[llvm] r342892 - [X86] Split WriteIMul into 8/16/32/64 implementations (PR36931)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 24 08:21:57 PDT 2018
Author: rksimon
Date: Mon Sep 24 08:21:57 2018
New Revision: 342892
URL: http://llvm.org/viewvc/llvm-project?rev=342892&view=rev
Log:
[X86] Split WriteIMul into 8/16/32/64 implementations (PR36931)
Split WriteIMul by size and also by IMUL multiply-by-imm and multiply-by-reg cases.
This removes all the scheduler overrides for gpr multiplies and stops WriteMULH being ignored for BMI2 MULX instructions.
Modified:
llvm/trunk/lib/Target/X86/X86InstrArithmetic.td
llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
llvm/trunk/lib/Target/X86/X86SchedHaswell.td
llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
llvm/trunk/lib/Target/X86/X86Schedule.td
llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
llvm/trunk/test/CodeGen/X86/bmi2-schedule.ll
llvm/trunk/test/tools/llvm-mca/X86/Broadwell/resources-bmi2.s
llvm/trunk/test/tools/llvm-mca/X86/Generic/resources-bmi2.s
llvm/trunk/test/tools/llvm-mca/X86/Haswell/resources-bmi2.s
llvm/trunk/test/tools/llvm-mca/X86/SkylakeClient/resources-bmi2.s
llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-bmi2.s
Modified: llvm/trunk/lib/Target/X86/X86InstrArithmetic.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrArithmetic.td?rev=342892&r1=342891&r2=342892&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrArithmetic.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrArithmetic.td Mon Sep 24 08:21:57 2018
@@ -63,18 +63,18 @@ def MUL8r : I<0xF6, MRM4r, (outs), (in
// This probably ought to be moved to a def : Pat<> if the
// syntax can be accepted.
[(set AL, (mul AL, GR8:$src)),
- (implicit EFLAGS)]>, Sched<[WriteIMul]>;
+ (implicit EFLAGS)]>, Sched<[WriteIMul8]>;
// AX,DX = AX*GR16
let Defs = [AX,DX,EFLAGS], Uses = [AX], hasSideEffects = 0 in
def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src),
"mul{w}\t$src",
- []>, OpSize16, Sched<[WriteIMul]>;
+ []>, OpSize16, Sched<[WriteIMul16]>;
// EAX,EDX = EAX*GR32
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], hasSideEffects = 0 in
def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src),
"mul{l}\t$src",
[/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/]>,
- OpSize32, Sched<[WriteIMul]>;
+ OpSize32, Sched<[WriteIMul32]>;
// RAX,RDX = RAX*GR64
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], hasSideEffects = 0 in
def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src),
@@ -89,16 +89,16 @@ def MUL8m : I<0xF6, MRM4m, (outs), (ins
// This probably ought to be moved to a def : Pat<> if the
// syntax can be accepted.
[(set AL, (mul AL, (loadi8 addr:$src))),
- (implicit EFLAGS)]>, SchedLoadReg<WriteIMul.Folded>;
+ (implicit EFLAGS)]>, SchedLoadReg<WriteIMul8.Folded>;
// AX,DX = AX*[mem16]
let mayLoad = 1, hasSideEffects = 0 in {
let Defs = [AX,DX,EFLAGS], Uses = [AX] in
def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src),
- "mul{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIMul.Folded>;
+ "mul{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIMul16.Folded>;
// EAX,EDX = EAX*[mem32]
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src),
- "mul{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIMul.Folded>;
+ "mul{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIMul32.Folded>;
// RAX,RDX = RAX*[mem64]
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src),
@@ -110,15 +110,15 @@ let hasSideEffects = 0 in {
// AL,AH = AL*GR8
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", []>,
- Sched<[WriteIMul]>;
+ Sched<[WriteIMul8]>;
// AX,DX = AX*GR16
let Defs = [AX,DX,EFLAGS], Uses = [AX] in
def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", []>,
- OpSize16, Sched<[WriteIMul]>;
+ OpSize16, Sched<[WriteIMul16]>;
// EAX,EDX = EAX*GR32
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", []>,
- OpSize32, Sched<[WriteIMul]>;
+ OpSize32, Sched<[WriteIMul32]>;
// RAX,RDX = RAX*GR64
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", []>,
@@ -128,15 +128,15 @@ let mayLoad = 1 in {
// AL,AH = AL*[mem8]
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src),
- "imul{b}\t$src", []>, SchedLoadReg<WriteIMul.Folded>;
+ "imul{b}\t$src", []>, SchedLoadReg<WriteIMul8.Folded>;
// AX,DX = AX*[mem16]
let Defs = [AX,DX,EFLAGS], Uses = [AX] in
def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src),
- "imul{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIMul.Folded>;
+ "imul{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIMul16.Folded>;
// EAX,EDX = EAX*[mem32]
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src),
- "imul{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIMul.Folded>;
+ "imul{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIMul32.Folded>;
// RAX,RDX = RAX*[mem64]
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
@@ -156,18 +156,18 @@ def IMUL16rr : I<0xAF, MRMSrcReg, (outs
"imul{w}\t{$src2, $dst|$dst, $src2}",
[(set GR16:$dst, EFLAGS,
(X86smul_flag GR16:$src1, GR16:$src2))]>,
- Sched<[WriteIMul]>, TB, OpSize16;
+ Sched<[WriteIMul16Reg]>, TB, OpSize16;
def IMUL32rr : I<0xAF, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2),
"imul{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, EFLAGS,
(X86smul_flag GR32:$src1, GR32:$src2))]>,
- Sched<[WriteIMul]>, TB, OpSize32;
+ Sched<[WriteIMul32Reg]>, TB, OpSize32;
def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"imul{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, EFLAGS,
(X86smul_flag GR64:$src1, GR64:$src2))]>,
- Sched<[WriteIMul64]>, TB;
+ Sched<[WriteIMul64Reg]>, TB;
} // isCommutable
// Register-Memory Signed Integer Multiply
@@ -176,19 +176,19 @@ def IMUL16rm : I<0xAF, MRMSrcMem, (outs
"imul{w}\t{$src2, $dst|$dst, $src2}",
[(set GR16:$dst, EFLAGS,
(X86smul_flag GR16:$src1, (loadi16 addr:$src2)))]>,
- Sched<[WriteIMul.Folded, ReadAfterLd]>, TB, OpSize16;
+ Sched<[WriteIMul16Reg.Folded, ReadAfterLd]>, TB, OpSize16;
def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst),
(ins GR32:$src1, i32mem:$src2),
"imul{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, EFLAGS,
(X86smul_flag GR32:$src1, (loadi32 addr:$src2)))]>,
- Sched<[WriteIMul.Folded, ReadAfterLd]>, TB, OpSize32;
+ Sched<[WriteIMul32Reg.Folded, ReadAfterLd]>, TB, OpSize32;
def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
(ins GR64:$src1, i64mem:$src2),
"imul{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, EFLAGS,
(X86smul_flag GR64:$src1, (loadi64 addr:$src2)))]>,
- Sched<[WriteIMul64.Folded, ReadAfterLd]>, TB;
+ Sched<[WriteIMul64Reg.Folded, ReadAfterLd]>, TB;
} // Constraints = "$src1 = $dst"
} // Defs = [EFLAGS]
@@ -201,37 +201,37 @@ def IMUL16rri : Ii16<0x69, MRMSrcReg,
"imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR16:$dst, EFLAGS,
(X86smul_flag GR16:$src1, imm:$src2))]>,
- Sched<[WriteIMul]>, OpSize16;
+ Sched<[WriteIMul16Imm]>, OpSize16;
def IMUL16rri8 : Ii8<0x6B, MRMSrcReg, // GR16 = GR16*I8
(outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
"imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR16:$dst, EFLAGS,
(X86smul_flag GR16:$src1, i16immSExt8:$src2))]>,
- Sched<[WriteIMul]>, OpSize16;
+ Sched<[WriteIMul16Imm]>, OpSize16;
def IMUL32rri : Ii32<0x69, MRMSrcReg, // GR32 = GR32*I32
(outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
"imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, EFLAGS,
(X86smul_flag GR32:$src1, imm:$src2))]>,
- Sched<[WriteIMul]>, OpSize32;
+ Sched<[WriteIMul32Imm]>, OpSize32;
def IMUL32rri8 : Ii8<0x6B, MRMSrcReg, // GR32 = GR32*I8
(outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
"imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, EFLAGS,
(X86smul_flag GR32:$src1, i32immSExt8:$src2))]>,
- Sched<[WriteIMul]>, OpSize32;
+ Sched<[WriteIMul32Imm]>, OpSize32;
def IMUL64rri32 : RIi32S<0x69, MRMSrcReg, // GR64 = GR64*I32
(outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
"imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR64:$dst, EFLAGS,
(X86smul_flag GR64:$src1, i64immSExt32:$src2))]>,
- Sched<[WriteIMul64]>;
+ Sched<[WriteIMul64Imm]>;
def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8
(outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
"imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR64:$dst, EFLAGS,
(X86smul_flag GR64:$src1, i64immSExt8:$src2))]>,
- Sched<[WriteIMul64]>;
+ Sched<[WriteIMul64Imm]>;
// Memory-Integer Signed Integer Multiply
def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16
@@ -239,41 +239,41 @@ def IMUL16rmi : Ii16<0x69, MRMSrcMem,
"imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR16:$dst, EFLAGS,
(X86smul_flag (loadi16 addr:$src1), imm:$src2))]>,
- Sched<[WriteIMul.Folded]>, OpSize16;
+ Sched<[WriteIMul16Imm.Folded]>, OpSize16;
def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem, // GR16 = [mem16]*I8
(outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2),
"imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR16:$dst, EFLAGS,
(X86smul_flag (loadi16 addr:$src1),
i16immSExt8:$src2))]>,
- Sched<[WriteIMul.Folded]>, OpSize16;
+ Sched<[WriteIMul16Imm.Folded]>, OpSize16;
def IMUL32rmi : Ii32<0x69, MRMSrcMem, // GR32 = [mem32]*I32
(outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2),
"imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, EFLAGS,
(X86smul_flag (loadi32 addr:$src1), imm:$src2))]>,
- Sched<[WriteIMul.Folded]>, OpSize32;
+ Sched<[WriteIMul32Imm.Folded]>, OpSize32;
def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem, // GR32 = [mem32]*I8
(outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2),
"imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, EFLAGS,
(X86smul_flag (loadi32 addr:$src1),
i32immSExt8:$src2))]>,
- Sched<[WriteIMul.Folded]>, OpSize32;
+ Sched<[WriteIMul32Imm.Folded]>, OpSize32;
def IMUL64rmi32 : RIi32S<0x69, MRMSrcMem, // GR64 = [mem64]*I32
(outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2),
"imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR64:$dst, EFLAGS,
(X86smul_flag (loadi64 addr:$src1),
i64immSExt32:$src2))]>,
- Sched<[WriteIMul64.Folded]>;
+ Sched<[WriteIMul64Imm.Folded]>;
def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8
(outs GR64:$dst), (ins i64mem:$src1, i64i8imm: $src2),
"imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR64:$dst, EFLAGS,
(X86smul_flag (loadi64 addr:$src1),
i64immSExt8:$src2))]>,
- Sched<[WriteIMul64.Folded]>;
+ Sched<[WriteIMul64Imm.Folded]>;
} // Defs = [EFLAGS]
// unsigned division/remainder
@@ -1299,7 +1299,7 @@ let hasSideEffects = 0 in {
let Predicates = [HasBMI2] in {
let Uses = [EDX] in
- defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem, WriteIMul>;
+ defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem, WriteIMul32>;
let Uses = [RDX] in
defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem, WriteIMul64>, VEX_W;
}
Modified: llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedBroadwell.td?rev=342892&r1=342891&r2=342892&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedBroadwell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedBroadwell.td Mon Sep 24 08:21:57 2018
@@ -108,8 +108,21 @@ def : WriteRes<WriteRMW, [BWPort237,BWPo
// Arithmetic.
defm : BWWriteResPair<WriteALU, [BWPort0156], 1>; // Simple integer ALU op.
defm : BWWriteResPair<WriteADC, [BWPort06], 1>; // Integer ALU + flags op.
-defm : BWWriteResPair<WriteIMul, [BWPort1], 3>; // Integer multiplication.
-defm : BWWriteResPair<WriteIMul64, [BWPort1], 3>; // Integer 64-bit multiplication.
+
+// Integer multiplication.
+defm : BWWriteResPair<WriteIMul8, [BWPort1], 3>;
+defm : BWWriteResPair<WriteIMul16, [BWPort1,BWPort06,BWPort0156], 4, [1,1,2], 4>;
+defm : X86WriteRes<WriteIMul16Imm, [BWPort1,BWPort0156], 4, [1,1], 2>;
+defm : X86WriteRes<WriteIMul16ImmLd, [BWPort1,BWPort0156,BWPort23], 8, [1,1,1], 3>;
+defm : BWWriteResPair<WriteIMul16Reg, [BWPort1], 3>;
+defm : BWWriteResPair<WriteIMul32, [BWPort1,BWPort06,BWPort0156], 4, [1,1,1], 3>;
+defm : BWWriteResPair<WriteIMul32Imm, [BWPort1], 3>;
+defm : BWWriteResPair<WriteIMul32Reg, [BWPort1], 3>;
+defm : BWWriteResPair<WriteIMul64, [BWPort1,BWPort5], 4, [1,1], 2>;
+defm : BWWriteResPair<WriteIMul64Imm, [BWPort1], 3>;
+defm : BWWriteResPair<WriteIMul64Reg, [BWPort1], 3>;
+def : WriteRes<WriteIMulH, []> { let Latency = 3; }
+
defm : BWWriteResPair<WriteDiv8, [BWPort0, BWDivider], 25, [1, 10]>;
defm : BWWriteResPair<WriteDiv16, [BWPort0, BWDivider], 25, [1, 10]>;
defm : BWWriteResPair<WriteDiv32, [BWPort0, BWDivider], 25, [1, 10]>;
@@ -126,7 +139,6 @@ defm : X86WriteRes<WriteBSWAP64, [BWPo
defm : X86WriteRes<WriteXCHG, [BWPort0156], 2, [3], 3>;
defm : BWWriteResPair<WriteCRC32, [BWPort1], 3>;
-def : WriteRes<WriteIMulH, []> { let Latency = 3; } // Integer multiplication, high part.
def : WriteRes<WriteLEA, [BWPort15]>; // LEA instructions can't fold loads.
@@ -735,13 +747,6 @@ def: InstRW<[BWWriteResGroup27], (instrs
def: InstRW<[BWWriteResGroup27], (instregex "P(DEP|EXT)(32|64)rr",
"(V?)CVTDQ2PS(Y?)rr")>;
-def BWWriteResGroup27_16 : SchedWriteRes<[BWPort1, BWPort0156]> {
- let Latency = 4;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[BWWriteResGroup27_16], (instrs IMUL16rri, IMUL16rri8)>;
-
def BWWriteResGroup28 : SchedWriteRes<[BWPort5]> {
let Latency = 3;
let NumMicroOps = 1;
@@ -818,8 +823,7 @@ def BWWriteResGroup42 : SchedWriteRes<[B
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup42], (instrs IMUL64r, MUL64r, MULX64rr,
- MMX_CVTPI2PDirr)>;
+def: InstRW<[BWWriteResGroup42], (instrs MMX_CVTPI2PDirr)>;
def: InstRW<[BWWriteResGroup42], (instregex "MMX_CVT(T?)PD2PIirr",
"MMX_CVT(T?)PS2PIirr",
"(V?)CVTDQ2PDrr",
@@ -830,13 +834,6 @@ def: InstRW<[BWWriteResGroup42], (instre
"(V?)CVTSI2SSrr",
"(V?)CVT(T?)PD2DQrr")>;
-def BWWriteResGroup42_16 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> {
- let Latency = 4;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,2];
-}
-def: InstRW<[BWWriteResGroup42_16], (instrs IMUL16r, MUL16r)>;
-
def BWWriteResGroup43 : SchedWriteRes<[BWPort0,BWPort4,BWPort237]> {
let Latency = 4;
let NumMicroOps = 3;
@@ -902,13 +899,6 @@ def BWWriteResGroup51 : SchedWriteRes<[B
}
def: InstRW<[BWWriteResGroup51], (instregex "STR(16|32|64)r")>;
-def BWWriteResGroup52 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> {
- let Latency = 4;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[BWWriteResGroup52], (instrs IMUL32r, MUL32r, MULX32rr)>;
-
def BWWriteResGroup54 : SchedWriteRes<[BWPort6,BWPort0156]> {
let Latency = 5;
let NumMicroOps = 5;
@@ -1133,20 +1123,6 @@ def: InstRW<[BWWriteResGroup91], (instrs
VCVTDQ2PSrm)>;
def: InstRW<[BWWriteResGroup91], (instregex "P(DEP|EXT)(32|64)rm")>;
-def BWWriteResGroup91_16 : SchedWriteRes<[BWPort1, BWPort0156, BWPort23]> {
- let Latency = 8;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[BWWriteResGroup91_16], (instrs IMUL16rmi, IMUL16rmi8)>;
-
-def BWWriteResGroup91_16_2 : SchedWriteRes<[BWPort1, BWPort06, BWPort0156, BWPort23]> {
- let Latency = 9;
- let NumMicroOps = 5;
- let ResourceCycles = [1,1,2,1];
-}
-def: InstRW<[BWWriteResGroup91_16_2], (instrs IMUL16m, MUL16m)>;
-
def BWWriteResGroup92 : SchedWriteRes<[BWPort5,BWPort23]> {
let Latency = 8;
let NumMicroOps = 2;
@@ -1220,8 +1196,7 @@ def BWWriteResGroup107 : SchedWriteRes<[
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[BWWriteResGroup107], (instrs IMUL64m, MUL64m, MULX64rm,
- CVTPD2PSrm,
+def: InstRW<[BWWriteResGroup107], (instrs CVTPD2PSrm,
CVTPD2DQrm,
CVTTPD2DQrm,
MMX_CVTPI2PDirm)>;
@@ -1273,13 +1248,6 @@ def BWWriteResGroup120 : SchedWriteRes<[
}
def: InstRW<[BWWriteResGroup120], (instregex "CVTTSS2SI64rm")>;
-def BWWriteResGroup121 : SchedWriteRes<[BWPort1,BWPort23,BWPort06,BWPort0156]> {
- let Latency = 9;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[BWWriteResGroup121], (instrs IMUL32m, MUL32m, MULX32rm)>;
-
def BWWriteResGroup122_1 : SchedWriteRes<[BWPort0,BWFPDivider]> {
let Latency = 11;
let NumMicroOps = 1;
Modified: llvm/trunk/lib/Target/X86/X86SchedHaswell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedHaswell.td?rev=342892&r1=342891&r2=342892&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedHaswell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedHaswell.td Mon Sep 24 08:21:57 2018
@@ -121,8 +121,20 @@ def : WriteRes<WriteZero, []>;
// Arithmetic.
defm : HWWriteResPair<WriteALU, [HWPort0156], 1>;
defm : HWWriteResPair<WriteADC, [HWPort06, HWPort0156], 2, [1,1], 2>;
-defm : HWWriteResPair<WriteIMul, [HWPort1], 3>;
-defm : HWWriteResPair<WriteIMul64, [HWPort1], 3>;
+
+// Integer multiplication.
+defm : HWWriteResPair<WriteIMul8, [HWPort1], 3>;
+defm : HWWriteResPair<WriteIMul16, [HWPort1,HWPort06,HWPort0156], 4, [1,1,2], 4>;
+defm : X86WriteRes<WriteIMul16Imm, [HWPort1,HWPort0156], 4, [1,1], 2>;
+defm : X86WriteRes<WriteIMul16ImmLd, [HWPort1,HWPort0156,HWPort23], 8, [1,1,1], 3>;
+defm : HWWriteResPair<WriteIMul16Reg, [HWPort1], 3>;
+defm : HWWriteResPair<WriteIMul32, [HWPort1,HWPort06,HWPort0156], 4, [1,1,1], 3>;
+defm : HWWriteResPair<WriteIMul32Imm, [HWPort1], 3>;
+defm : HWWriteResPair<WriteIMul32Reg, [HWPort1], 3>;
+defm : HWWriteResPair<WriteIMul64, [HWPort1,HWPort6], 4, [1,1], 2>;
+defm : HWWriteResPair<WriteIMul64Imm, [HWPort1], 3>;
+defm : HWWriteResPair<WriteIMul64Reg, [HWPort1], 3>;
+def : WriteRes<WriteIMulH, []> { let Latency = 3; }
defm : X86WriteRes<WriteBSWAP32, [HWPort15], 1, [1], 1>;
defm : X86WriteRes<WriteBSWAP64, [HWPort06, HWPort15], 2, [1,1], 2>;
@@ -130,8 +142,6 @@ defm : X86WriteRes<WriteCMPXCHG,[HWPort0
defm : X86WriteRes<WriteCMPXCHGRMW,[HWPort23,HWPort06,HWPort0156,HWPort237,HWPort4], 9, [1,2,1,1,1], 6>;
defm : X86WriteRes<WriteXCHG, [HWPort0156], 2, [3], 3>;
-def : WriteRes<WriteIMulH, []> { let Latency = 3; }
-
// Integer shifts and rotates.
defm : HWWriteResPair<WriteShift, [HWPort06], 1>;
defm : HWWriteResPair<WriteShiftCL, [HWPort06, HWPort0156], 3, [2,1], 3>;
@@ -957,20 +967,6 @@ def HWWriteResGroup12 : SchedWriteRes<[H
def: InstRW<[HWWriteResGroup12], (instrs MMX_CVTPI2PSirm)>;
def: InstRW<[HWWriteResGroup12], (instregex "P(DEP|EXT)(32|64)rm")>;
-def HWWriteResGroup12_1 : SchedWriteRes<[HWPort1,HWPort0156,HWPort23]> {
- let Latency = 8;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[HWWriteResGroup12_1], (instrs IMUL16rmi, IMUL16rmi8)>;
-
-def HWWriteResGroup12_2 : SchedWriteRes<[HWPort1,HWPort06,HWPort0156,HWPort23]> {
- let Latency = 9;
- let NumMicroOps = 5;
- let ResourceCycles = [1,1,2,1];
-}
-def: InstRW<[HWWriteResGroup12_2], (instrs IMUL16m, MUL16m)>;
-
def HWWriteResGroup13 : SchedWriteRes<[HWPort5,HWPort23]> {
let Latency = 6;
let NumMicroOps = 2;
@@ -1221,13 +1217,6 @@ def: InstRW<[HWWriteResGroup50], (instrs
def: InstRW<[HWWriteResGroup50], (instregex "P(DEP|EXT)(32|64)rr",
"(V?)CVTDQ2PS(Y?)rr")>;
-def HWWriteResGroup50_16i : SchedWriteRes<[HWPort1, HWPort0156]> {
- let Latency = 4;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[HWWriteResGroup50_16i], (instrs IMUL16rri, IMUL16rri8)>;
-
def HWWriteResGroup51 : SchedWriteRes<[HWPort5]> {
let Latency = 3;
let NumMicroOps = 1;
@@ -1369,20 +1358,6 @@ def: InstRW<[HWWriteResGroup73], (instre
"(V?)CVTSI2SSrr",
"(V?)CVT(T?)PD2DQrr")>;
-def HWWriteResGroup74 : SchedWriteRes<[HWPort1,HWPort6]> {
- let Latency = 4;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[HWWriteResGroup74], (instrs IMUL64r, MUL64r, MULX64rr)>;
-
-def HWWriteResGroup74_16 : SchedWriteRes<[HWPort1, HWPort06, HWPort0156]> {
- let Latency = 4;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,2];
-}
-def: InstRW<[HWWriteResGroup74_16], (instrs IMUL16r, MUL16r)>;
-
def HWWriteResGroup75 : SchedWriteRes<[HWPort1,HWPort23]> {
let Latency = 11;
let NumMicroOps = 3;
@@ -1430,13 +1405,6 @@ def: InstRW<[HWWriteResGroup78_1], (inst
CVTSD2SSrm,
VCVTSD2SSrm)>;
-def HWWriteResGroup79 : SchedWriteRes<[HWPort1,HWPort6,HWPort23]> {
- let Latency = 9;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[HWWriteResGroup79], (instrs IMUL64m, MUL64m, MULX64rm)>;
-
def HWWriteResGroup80 : SchedWriteRes<[HWPort5,HWPort23,HWPort015]> {
let Latency = 9;
let NumMicroOps = 3;
@@ -1517,13 +1485,6 @@ def HWWriteResGroup94 : SchedWriteRes<[H
}
def: InstRW<[HWWriteResGroup94], (instregex "STR(16|32|64)r")>;
-def HWWriteResGroup95 : SchedWriteRes<[HWPort1,HWPort06,HWPort0156]> {
- let Latency = 4;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[HWWriteResGroup95], (instrs IMUL32r, MUL32r, MULX32rr)>;
-
def HWWriteResGroup97 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort23]> {
let Latency = 10;
let NumMicroOps = 4;
@@ -1531,13 +1492,6 @@ def HWWriteResGroup97 : SchedWriteRes<[H
}
def: InstRW<[HWWriteResGroup97], (instregex "CVTTSS2SI64rm")>;
-def HWWriteResGroup98 : SchedWriteRes<[HWPort1,HWPort23,HWPort06,HWPort0156]> {
- let Latency = 9;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[HWWriteResGroup98], (instrs IMUL32m, MUL32m, MULX32rm)>;
-
def HWWriteResGroup99 : SchedWriteRes<[HWPort6,HWPort0156]> {
let Latency = 5;
let NumMicroOps = 5;
Modified: llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td?rev=342892&r1=342891&r2=342892&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td Mon Sep 24 08:21:57 2018
@@ -109,10 +109,21 @@ def : WriteRes<WriteZero, []>;
// Arithmetic.
defm : SBWriteResPair<WriteALU, [SBPort015], 1>;
defm : SBWriteResPair<WriteADC, [SBPort05,SBPort015], 2, [1,1], 2>;
-defm : SBWriteResPair<WriteIMul, [SBPort1], 3>;
-defm : SBWriteResPair<WriteIMul64, [SBPort1], 3>;
-defm : X86WriteRes<WriteXCHG, [SBPort015], 2, [3], 3>;
+defm : SBWriteResPair<WriteIMul8, [SBPort1], 3>;
+defm : SBWriteResPair<WriteIMul16, [SBPort1,SBPort05,SBPort015], 4, [1,1,2], 4>;
+defm : X86WriteRes<WriteIMul16Imm, [SBPort1,SBPort015], 4, [1,1], 2>;
+defm : X86WriteRes<WriteIMul16ImmLd, [SBPort1,SBPort015,SBPort23], 8, [1,1,1], 3>;
+defm : SBWriteResPair<WriteIMul16Reg, [SBPort1], 3>;
+defm : SBWriteResPair<WriteIMul32, [SBPort1,SBPort05,SBPort015], 4, [1,1,1], 3>;
+defm : SBWriteResPair<WriteIMul32Imm, [SBPort1], 3>;
+defm : SBWriteResPair<WriteIMul32Reg, [SBPort1], 3>;
+defm : SBWriteResPair<WriteIMul64, [SBPort1,SBPort0], 4, [1,1], 2>;
+defm : SBWriteResPair<WriteIMul64Imm, [SBPort1], 3>;
+defm : SBWriteResPair<WriteIMul64Reg, [SBPort1], 3>;
+def : WriteRes<WriteIMulH, []> { let Latency = 3; }
+
+defm : X86WriteRes<WriteXCHG, [SBPort015], 2, [3], 3>;
defm : X86WriteRes<WriteBSWAP32, [SBPort1], 1, [1], 1>;
defm : X86WriteRes<WriteBSWAP64, [SBPort1, SBPort05], 2, [1,1], 2>;
defm : X86WriteRes<WriteCMPXCHG, [SBPort05, SBPort015], 5, [1,3], 4>;
@@ -127,8 +138,6 @@ defm : SBWriteResPair<WriteIDiv16, [SBPo
defm : SBWriteResPair<WriteIDiv32, [SBPort0, SBDivider], 25, [1, 10]>;
defm : SBWriteResPair<WriteIDiv64, [SBPort0, SBDivider], 25, [1, 10]>;
-def : WriteRes<WriteIMulH, []> { let Latency = 3; }
-
// SHLD/SHRD.
defm : X86WriteRes<WriteSHDrri, [SBPort05, SBPort015], 2, [1, 1], 2>;
defm : X86WriteRes<WriteSHDrrcl,[SBPort05, SBPort015], 4, [3, 1], 4>;
@@ -641,13 +650,6 @@ def SBWriteResGroup21 : SchedWriteRes<[S
}
def: InstRW<[SBWriteResGroup21], (instrs PUSHFS64)>;
-def SBWriteResGroup21_16i : SchedWriteRes<[SBPort1, SBPort015]> {
- let Latency = 4;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup21_16i], (instrs IMUL16rri, IMUL16rri8)>;
-
def SBWriteResGroup22 : SchedWriteRes<[SBPort0,SBPort5]> {
let Latency = 3;
let NumMicroOps = 2;
@@ -677,27 +679,6 @@ def SBWriteResGroup26_2 : SchedWriteRes<
}
def: InstRW<[SBWriteResGroup26_2], (instrs COM_FIPr, COM_FIr, UCOM_FIPr, UCOM_FIr)>;
-def SBWriteResGroup27 : SchedWriteRes<[SBPort0,SBPort1]> {
- let Latency = 4;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup27], (instrs IMUL64r, MUL64r)>;
-
-def SBWriteResGroup27_1 : SchedWriteRes<[SBPort1,SBPort05,SBPort015]> {
- let Latency = 4;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SBWriteResGroup27_1], (instrs IMUL32r, MUL32r)>;
-
-def SBWriteResGroup27_2 : SchedWriteRes<[SBPort1,SBPort05,SBPort015]> {
- let Latency = 4;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,2];
-}
-def: InstRW<[SBWriteResGroup27_2], (instrs IMUL16r, MUL16r)>;
-
def SBWriteResGroup29 : SchedWriteRes<[SBPort1,SBPort015]> {
let Latency = 4;
let NumMicroOps = 2;
@@ -1009,34 +990,6 @@ def SBWriteResGroup93 : SchedWriteRes<[S
}
def: InstRW<[SBWriteResGroup93], (instregex "CVT(T?)(SD|SS)2SI(64)?rm")>;
-def SBWriteResGroup93_1 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
- let Latency = 9;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SBWriteResGroup93_1], (instrs IMUL64m, MUL64m)>;
-
-def SBWriteResGroup93_2 : SchedWriteRes<[SBPort1,SBPort23,SBPort05,SBPort015]> {
- let Latency = 9;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[SBWriteResGroup93_2], (instrs IMUL32m, MUL32m)>;
-
-def SBWriteResGroup93_3 : SchedWriteRes<[SBPort1,SBPort05,SBPort015,SBPort23]> {
- let Latency = 9;
- let NumMicroOps = 5;
- let ResourceCycles = [1,1,2,1];
-}
-def: InstRW<[SBWriteResGroup93_3], (instrs IMUL16m, MUL16m)>;
-
-def SBWriteResGroup93_4 : SchedWriteRes<[SBPort1,SBPort015,SBPort23]> {
- let Latency = 8;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SBWriteResGroup93_4], (instrs IMUL16rmi, IMUL16rmi8)>;
-
def SBWriteResGroup95 : SchedWriteRes<[SBPort5,SBPort01,SBPort23]> {
let Latency = 9;
let NumMicroOps = 3;
Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td?rev=342892&r1=342891&r2=342892&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td Mon Sep 24 08:21:57 2018
@@ -107,8 +107,20 @@ def : WriteRes<WriteRMW, [SKLPort237,SKL
// Arithmetic.
defm : SKLWriteResPair<WriteALU, [SKLPort0156], 1>; // Simple integer ALU op.
defm : SKLWriteResPair<WriteADC, [SKLPort06], 1>; // Integer ALU + flags op.
-defm : SKLWriteResPair<WriteIMul, [SKLPort1], 3>; // Integer multiplication.
-defm : SKLWriteResPair<WriteIMul64, [SKLPort1], 3>; // Integer 64-bit multiplication.
+
+// Integer multiplication.
+defm : SKLWriteResPair<WriteIMul8, [SKLPort1], 3>;
+defm : SKLWriteResPair<WriteIMul16, [SKLPort1,SKLPort06,SKLPort0156], 4, [1,1,2], 4>;
+defm : X86WriteRes<WriteIMul16Imm, [SKLPort1,SKLPort0156], 4, [1,1], 2>;
+defm : X86WriteRes<WriteIMul16ImmLd, [SKLPort1,SKLPort0156,SKLPort23], 8, [1,1,1], 3>;
+defm : SKLWriteResPair<WriteIMul16Reg, [SKLPort1], 3>;
+defm : SKLWriteResPair<WriteIMul32, [SKLPort1,SKLPort06,SKLPort0156], 4, [1,1,1], 3>;
+defm : SKLWriteResPair<WriteIMul32Imm, [SKLPort1], 3>;
+defm : SKLWriteResPair<WriteIMul32Reg, [SKLPort1], 3>;
+defm : SKLWriteResPair<WriteIMul64, [SKLPort1,SKLPort5], 4, [1,1], 2>;
+defm : SKLWriteResPair<WriteIMul64Imm, [SKLPort1], 3>;
+defm : SKLWriteResPair<WriteIMul64Reg, [SKLPort1], 3>;
+def : WriteRes<WriteIMulH, []> { let Latency = 3; }
defm : X86WriteRes<WriteBSWAP32, [SKLPort15], 1, [1], 1>;
defm : X86WriteRes<WriteBSWAP64, [SKLPort06, SKLPort15], 2, [1,1], 2>;
@@ -127,7 +139,6 @@ defm : SKLWriteResPair<WriteIDiv64, [SKL
defm : SKLWriteResPair<WriteCRC32, [SKLPort1], 3>;
-def : WriteRes<WriteIMulH, []> { let Latency = 3; } // Integer multiplication, high part.
def : WriteRes<WriteLEA, [SKLPort15]>; // LEA instructions can't fold loads.
defm : SKLWriteResPair<WriteCMOV, [SKLPort06], 1, [1], 1>; // Conditional move.
@@ -738,13 +749,6 @@ def SKLWriteResGroup29 : SchedWriteRes<[
def: InstRW<[SKLWriteResGroup29], (instregex "PDEP(32|64)rr",
"PEXT(32|64)rr")>;
-def SKLWriteResGroup29_16i : SchedWriteRes<[SKLPort1, SKLPort0156]> {
- let Latency = 4;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SKLWriteResGroup29_16i], (instrs IMUL16rri, IMUL16rri8)>;
-
def SKLWriteResGroup30 : SchedWriteRes<[SKLPort5]> {
let Latency = 3;
let NumMicroOps = 1;
@@ -850,20 +854,6 @@ def SKLWriteResGroup48 : SchedWriteRes<[
def: InstRW<[SKLWriteResGroup48], (instregex "(V?)CVTDQ2PS(Y?)rr",
"(V?)CVT(T?)PS2DQ(Y?)rr")>;
-def SKLWriteResGroup51 : SchedWriteRes<[SKLPort1,SKLPort5]> {
- let Latency = 4;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SKLWriteResGroup51], (instrs IMUL64r, MUL64r, MULX64rr)>;
-
-def SKLWriteResGroup51_16 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> {
- let Latency = 4;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,2];
-}
-def: InstRW<[SKLWriteResGroup51_16], (instrs IMUL16r, MUL16r)>;
-
def SKLWriteResGroup53 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort237]> {
let Latency = 4;
let NumMicroOps = 3;
@@ -941,13 +931,6 @@ def SKLWriteResGroup61 : SchedWriteRes<[
}
def: InstRW<[SKLWriteResGroup61], (instregex "STR(16|32|64)r")>;
-def SKLWriteResGroup62 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> {
- let Latency = 4;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SKLWriteResGroup62], (instrs IMUL32r, MUL32r, MULX32rr)>;
-
def SKLWriteResGroup63 : SchedWriteRes<[SKLPort06,SKLPort0156]> {
let Latency = 5;
let NumMicroOps = 5;
@@ -1218,20 +1201,6 @@ def SKLWriteResGroup107 : SchedWriteRes<
def: InstRW<[SKLWriteResGroup107], (instregex "PDEP(32|64)rm",
"PEXT(32|64)rm")>;
-def SKLWriteResGroup107_16 : SchedWriteRes<[SKLPort1, SKLPort0156, SKLPort23]> {
- let Latency = 8;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SKLWriteResGroup107_16], (instrs IMUL16rmi, IMUL16rmi8)>;
-
-def SKLWriteResGroup107_16_2 : SchedWriteRes<[SKLPort1, SKLPort06, SKLPort0156, SKLPort23]> {
- let Latency = 9;
- let NumMicroOps = 5;
- let ResourceCycles = [1,1,2,1];
-}
-def: InstRW<[SKLWriteResGroup107_16_2], (instrs IMUL16m, MUL16m)>;
-
def SKLWriteResGroup108 : SchedWriteRes<[SKLPort5,SKLPort23]> {
let Latency = 8;
let NumMicroOps = 2;
@@ -1313,13 +1282,6 @@ def SKLWriteResGroup123 : SchedWriteRes<
def: InstRW<[SKLWriteResGroup123], (instregex "MMX_CVT(T?)PS2PIirm",
"(V?)CVTPS2PDrm")>;
-def SKLWriteResGroup127 : SchedWriteRes<[SKLPort1,SKLPort5,SKLPort23]> {
- let Latency = 9;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SKLWriteResGroup127], (instrs IMUL64m, MUL64m, MULX64rm)>;
-
def SKLWriteResGroup128 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> {
let Latency = 9;
let NumMicroOps = 4;
@@ -1377,13 +1339,6 @@ def SKLWriteResGroup140 : SchedWriteRes<
def: InstRW<[SKLWriteResGroup140], (instrs VPHADDSWYrm,
VPHSUBSWYrm)>;
-def SKLWriteResGroup142 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort06,SKLPort0156]> {
- let Latency = 9;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[SKLWriteResGroup142], (instrs IMUL32m, MUL32m, MULX32rm)>;
-
def SKLWriteResGroup143 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> {
let Latency = 10;
let NumMicroOps = 8;
Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td?rev=342892&r1=342891&r2=342892&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td Mon Sep 24 08:21:57 2018
@@ -107,8 +107,21 @@ def : WriteRes<WriteRMW, [SKXPort237,SKX
// Arithmetic.
defm : SKXWriteResPair<WriteALU, [SKXPort0156], 1>; // Simple integer ALU op.
defm : SKXWriteResPair<WriteADC, [SKXPort06], 1>; // Integer ALU + flags op.
-defm : SKXWriteResPair<WriteIMul, [SKXPort1], 3>; // Integer multiplication.
-defm : SKXWriteResPair<WriteIMul64, [SKXPort1], 3>; // Integer 64-bit multiplication.
+
+// Integer multiplication.
+defm : SKXWriteResPair<WriteIMul8, [SKXPort1], 3>;
+defm : SKXWriteResPair<WriteIMul16, [SKXPort1,SKXPort06,SKXPort0156], 4, [1,1,2], 4>;
+defm : X86WriteRes<WriteIMul16Imm, [SKXPort1,SKXPort0156], 4, [1,1], 2>;
+defm : X86WriteRes<WriteIMul16ImmLd, [SKXPort1,SKXPort0156,SKXPort23], 8, [1,1,1], 3>;
+defm : X86WriteRes<WriteIMul16Reg, [SKXPort1], 3, [1], 1>;
+defm : X86WriteRes<WriteIMul16RegLd, [SKXPort1,SKXPort0156,SKXPort23], 8, [1,1,1], 3>;
+defm : SKXWriteResPair<WriteIMul32, [SKXPort1,SKXPort06,SKXPort0156], 4, [1,1,1], 3>;
+defm : SKXWriteResPair<WriteIMul32Imm, [SKXPort1], 3>;
+defm : SKXWriteResPair<WriteIMul32Reg, [SKXPort1], 3>;
+defm : SKXWriteResPair<WriteIMul64, [SKXPort1,SKXPort5], 4, [1,1], 2>;
+defm : SKXWriteResPair<WriteIMul64Imm, [SKXPort1], 3>;
+defm : SKXWriteResPair<WriteIMul64Reg, [SKXPort1], 3>;
+def : WriteRes<WriteIMulH, []> { let Latency = 3; }
defm : X86WriteRes<WriteBSWAP32, [SKXPort15], 1, [1], 1>;
defm : X86WriteRes<WriteBSWAP64, [SKXPort06, SKXPort15], 2, [1,1], 2>;
@@ -127,7 +140,6 @@ defm : SKXWriteResPair<WriteIDiv64, [SKX
defm : SKXWriteResPair<WriteCRC32, [SKXPort1], 3>;
-def : WriteRes<WriteIMulH, []> { let Latency = 3; } // Integer multiplication, high part.
def : WriteRes<WriteLEA, [SKXPort15]>; // LEA instructions can't fold loads.
defm : SKXWriteResPair<WriteCMOV, [SKXPort06], 1, [1], 1>; // Conditional move.
@@ -778,14 +790,6 @@ def SKXWriteResGroup31 : SchedWriteRes<[
def: InstRW<[SKXWriteResGroup31], (instregex "PDEP(32|64)rr",
"PEXT(32|64)rr")>;
-def SKXWriteResGroup31_16i : SchedWriteRes<[SKXPort1, SKXPort0156]> {
- let Latency = 4;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SKXWriteResGroup31_16i], (instrs IMUL16rri, IMUL16rri8)>;
-
-
def SKXWriteResGroup32 : SchedWriteRes<[SKXPort5]> {
let Latency = 3;
let NumMicroOps = 1;
@@ -969,20 +973,6 @@ def: InstRW<[SKXWriteResGroup51], (instr
"VPMOVUSWB(Z|Z128|Z256)rr",
"VPMOVWB(Z|Z128|Z256)rr")>;
-def SKXWriteResGroup52 : SchedWriteRes<[SKXPort1,SKXPort5]> {
- let Latency = 4;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SKXWriteResGroup52], (instrs IMUL64r, MUL64r, MULX64rr)>;
-
-def SKXWriteResGroup52_16 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> {
- let Latency = 4;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,2];
-}
-def: InstRW<[SKXWriteResGroup52_16], (instrs IMUL16r, MUL16r)>;
-
def SKXWriteResGroup54 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> {
let Latency = 4;
let NumMicroOps = 3;
@@ -1070,13 +1060,6 @@ def SKXWriteResGroup63 : SchedWriteRes<[
}
def: InstRW<[SKXWriteResGroup63], (instregex "STR(16|32|64)r")>;
-def SKXWriteResGroup64 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> {
- let Latency = 4;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SKXWriteResGroup64], (instrs IMUL32r, MUL32r, MULX32rr)>;
-
def SKXWriteResGroup65 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort015]> {
let Latency = 5;
let NumMicroOps = 3;
@@ -1519,20 +1502,6 @@ def SKXWriteResGroup118 : SchedWriteRes<
def: InstRW<[SKXWriteResGroup118], (instregex "PDEP(32|64)rm",
"PEXT(32|64)rm")>;
-def SKXWriteResGroup118_16_1 : SchedWriteRes<[SKXPort1, SKXPort0156, SKXPort23]> {
- let Latency = 8;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SKXWriteResGroup118_16_1], (instrs IMUL16rm, IMUL16rmi, IMUL16rmi8)>;
-
-def SKXWriteResGroup118_16_2 : SchedWriteRes<[SKXPort1, SKXPort06, SKXPort0156, SKXPort23]> {
- let Latency = 9;
- let NumMicroOps = 5;
- let ResourceCycles = [1,1,2,1];
-}
-def: InstRW<[SKXWriteResGroup118_16_2], (instrs IMUL16m, MUL16m)>;
-
def SKXWriteResGroup119 : SchedWriteRes<[SKXPort5,SKXPort23]> {
let Latency = 8;
let NumMicroOps = 2;
@@ -1741,13 +1710,6 @@ def SKXWriteResGroup137 : SchedWriteRes<
def: InstRW<[SKXWriteResGroup137], (instregex "MMX_CVT(T?)PS2PIirm",
"(V?)CVTPS2PDrm")>;
-def SKXWriteResGroup142 : SchedWriteRes<[SKXPort1,SKXPort5,SKXPort23]> {
- let Latency = 9;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SKXWriteResGroup142], (instrs IMUL64m, MUL64m, MULX64rm)>;
-
def SKXWriteResGroup143 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23]> {
let Latency = 9;
let NumMicroOps = 4;
@@ -1857,13 +1819,6 @@ def SKXWriteResGroup154 : SchedWriteRes<
def: InstRW<[SKXWriteResGroup154], (instrs VPHADDSWYrm,
VPHSUBSWYrm)>;
-def SKXWriteResGroup156 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort06,SKXPort0156]> {
- let Latency = 9;
- let NumMicroOps = 4;
- let ResourceCycles = [1,1,1,1];
-}
-def: InstRW<[SKXWriteResGroup156], (instrs IMUL32m, MUL32m, MULX32rm)>;
-
def SKXWriteResGroup157 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> {
let Latency = 10;
let NumMicroOps = 8;
Modified: llvm/trunk/lib/Target/X86/X86Schedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Schedule.td?rev=342892&r1=342891&r2=342892&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Schedule.td (original)
+++ llvm/trunk/lib/Target/X86/X86Schedule.td Mon Sep 24 08:21:57 2018
@@ -113,11 +113,21 @@ defm WriteALU : X86SchedWritePair; //
defm WriteADC : X86SchedWritePair; // Integer ALU + flags op.
def WriteALURMW : WriteSequence<[WriteALULd, WriteStore]>;
def WriteADCRMW : WriteSequence<[WriteADCLd, WriteStore]>;
-defm WriteIMul : X86SchedWritePair; // Integer multiplication.
-defm WriteIMul64 : X86SchedWritePair; // Integer 64-bit multiplication.
-def WriteIMulH : SchedWrite; // Integer multiplication, high part.
def WriteLEA : SchedWrite; // LEA instructions can't fold loads.
+// Integer multiplication
+defm WriteIMul8 : X86SchedWritePair; // Integer 8-bit multiplication.
+defm WriteIMul16 : X86SchedWritePair; // Integer 16-bit multiplication.
+defm WriteIMul16Imm : X86SchedWritePair; // Integer 16-bit multiplication by immediate.
+defm WriteIMul16Reg : X86SchedWritePair; // Integer 16-bit multiplication by register.
+defm WriteIMul32 : X86SchedWritePair; // Integer 32-bit multiplication.
+defm WriteIMul32Imm : X86SchedWritePair; // Integer 32-bit multiplication by immediate.
+defm WriteIMul32Reg : X86SchedWritePair; // Integer 32-bit multiplication by register.
+defm WriteIMul64 : X86SchedWritePair; // Integer 64-bit multiplication.
+defm WriteIMul64Imm : X86SchedWritePair; // Integer 64-bit multiplication by immediate.
+defm WriteIMul64Reg : X86SchedWritePair; // Integer 64-bit multiplication by register.
+def WriteIMulH : SchedWrite; // Integer multiplication, high part.
+
def WriteBSWAP32 : SchedWrite; // Byte Order (Endianness) 32-bit Swap.
def WriteBSWAP64 : SchedWrite; // Byte Order (Endianness) 64-bit Swap.
defm WriteCMPXCHG : X86SchedWritePair; // Compare and set, compare and swap.
Modified: llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleAtom.td?rev=342892&r1=342891&r2=342892&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleAtom.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleAtom.td Mon Sep 24 08:21:57 2018
@@ -78,8 +78,18 @@ def : WriteRes<WriteRMW, [AtomPort0]>;
defm : AtomWriteResPair<WriteALU, [AtomPort01], [AtomPort0]>;
defm : AtomWriteResPair<WriteADC, [AtomPort01], [AtomPort0]>;
-defm : AtomWriteResPair<WriteIMul, [AtomPort01], [AtomPort01], 7, 7, [7], [7]>;
-defm : AtomWriteResPair<WriteIMul64, [AtomPort01], [AtomPort01], 12, 12, [12], [12]>;
+
+defm : AtomWriteResPair<WriteIMul8, [AtomPort01], [AtomPort01], 7, 7, [7], [7]>;
+defm : AtomWriteResPair<WriteIMul16, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
+defm : AtomWriteResPair<WriteIMul16Imm, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
+defm : AtomWriteResPair<WriteIMul16Reg, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
+defm : AtomWriteResPair<WriteIMul32, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
+defm : AtomWriteResPair<WriteIMul32Imm, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
+defm : AtomWriteResPair<WriteIMul32Reg, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
+defm : AtomWriteResPair<WriteIMul64, [AtomPort01], [AtomPort01], 12, 12, [12], [12]>;
+defm : AtomWriteResPair<WriteIMul64Imm, [AtomPort01], [AtomPort01], 14, 14, [14], [14]>;
+defm : AtomWriteResPair<WriteIMul64Reg, [AtomPort01], [AtomPort01], 12, 12, [12], [12]>;
+defm : X86WriteResUnsupported<WriteIMulH>;
defm : X86WriteRes<WriteXCHG, [AtomPort01], 2, [2], 1>;
defm : X86WriteRes<WriteBSWAP32, [AtomPort0], 1, [1], 1>;
@@ -113,30 +123,9 @@ def : WriteRes<WriteLAHFSAHF, [AtomPort
}
def : WriteRes<WriteBitTest,[AtomPort01]>;
-defm : X86WriteResUnsupported<WriteIMulH>;
-
// This is for simple LEAs with one or two input operands.
def : WriteRes<WriteLEA, [AtomPort1]>;
-def AtomWriteIMul16Ld : SchedWriteRes<[AtomPort01]> {
- let Latency = 8;
- let ResourceCycles = [8];
-}
-def : InstRW<[AtomWriteIMul16Ld], (instrs MUL16m, IMUL16m)>;
-
-def AtomWriteIMul32 : SchedWriteRes<[AtomPort01]> {
- let Latency = 6;
- let ResourceCycles = [6];
-}
-def : InstRW<[AtomWriteIMul32], (instrs MUL32r, IMUL32r)>;
-
-def AtomWriteIMul64I : SchedWriteRes<[AtomPort01]> {
- let Latency = 14;
- let ResourceCycles = [14];
-}
-def : InstRW<[AtomWriteIMul64I], (instrs IMUL64rri8, IMUL64rri32,
- IMUL64rmi8, IMUL64rmi32)>;
-
// Bit counts.
defm : AtomWriteResPair<WriteBSF, [AtomPort01], [AtomPort01], 16, 16, [16], [16]>;
defm : AtomWriteResPair<WriteBSR, [AtomPort01], [AtomPort01], 16, 16, [16], [16]>;
@@ -505,12 +494,6 @@ def : SchedAlias<WriteADCRMW, AtomWrite0
def : InstRW<[AtomWrite0_1], (instregex "(RCL|RCR|ROL|ROR|SAR|SHL|SHR)(8|16|32|64)m",
"MOV(S|Z)X(32|64)rr(8|8_NOREX|16)")>;
-def AtomWrite0_5 : SchedWriteRes<[AtomPort0]> {
- let Latency = 5;
- let ResourceCycles = [5];
-}
-def : InstRW<[AtomWrite0_5], (instregex "IMUL32(rm|rr)")>;
-
// Port1
def AtomWrite1_1 : SchedWriteRes<[AtomPort1]> {
let Latency = 1;
@@ -621,8 +604,7 @@ def : InstRW<[AtomWrite01_6], (instrs CM
SHLD16rri8, SHRD16rri8,
SHLD16mrCL, SHRD16mrCL,
SHLD16mri8, SHRD16mri8)>;
-def : InstRW<[AtomWrite01_6], (instregex "IMUL16rr",
- "IST_F(P)?(16|32|64)?m",
+def : InstRW<[AtomWrite01_6], (instregex "IST_F(P)?(16|32|64)?m",
"MMX_PH(ADD|SUB)S?Wrm")>;
def AtomWrite01_7 : SchedWriteRes<[AtomPort01]> {
Modified: llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td?rev=342892&r1=342891&r2=342892&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td Mon Sep 24 08:21:57 2018
@@ -164,9 +164,6 @@ def : WriteRes<WriteRMW, [JSAGU]>;
defm : JWriteResIntPair<WriteALU, [JALU01], 1>;
defm : JWriteResIntPair<WriteADC, [JALU01], 1, [2]>;
-defm : JWriteResIntPair<WriteIMul, [JALU1, JMul], 3, [1, 1], 2>; // i8/i16/i32 multiplication
-defm : JWriteResIntPair<WriteIMul64, [JALU1, JMul], 6, [1, 4], 2>; // i64 multiplication
-defm : X86WriteRes<WriteIMulH, [JALU1], 6, [4], 1>;
defm : X86WriteRes<WriteBSWAP32, [JALU01], 1, [1], 1>;
defm : X86WriteRes<WriteBSWAP64, [JALU01], 1, [1], 1>;
@@ -174,6 +171,18 @@ defm : X86WriteRes<WriteCMPXCHG,[JALU01]
defm : X86WriteRes<WriteCMPXCHGRMW,[JALU01, JSAGU, JLAGU], 4, [1, 1, 1], 2>;
defm : X86WriteRes<WriteXCHG, [JALU01], 1, [1], 1>;
+defm : JWriteResIntPair<WriteIMul8, [JALU1, JMul], 3, [1, 1], 2>;
+defm : JWriteResIntPair<WriteIMul16, [JALU1, JMul], 3, [1, 1], 2>;
+defm : JWriteResIntPair<WriteIMul16Imm, [JALU1, JMul], 3, [1, 1], 2>;
+defm : JWriteResIntPair<WriteIMul16Reg, [JALU1, JMul], 3, [1, 1], 2>;
+defm : JWriteResIntPair<WriteIMul32, [JALU1, JMul], 3, [1, 1], 2>;
+defm : JWriteResIntPair<WriteIMul32Imm, [JALU1, JMul], 3, [1, 1], 2>;
+defm : JWriteResIntPair<WriteIMul32Reg, [JALU1, JMul], 3, [1, 1], 2>;
+defm : JWriteResIntPair<WriteIMul64, [JALU1, JMul], 6, [1, 4], 2>;
+defm : JWriteResIntPair<WriteIMul64Imm, [JALU1, JMul], 6, [1, 4], 2>;
+defm : JWriteResIntPair<WriteIMul64Reg, [JALU1, JMul], 6, [1, 4], 2>;
+defm : X86WriteRes<WriteIMulH, [JALU1], 6, [4], 1>;
+
defm : JWriteResIntPair<WriteDiv8, [JALU1, JDiv], 12, [1, 12], 1>;
defm : JWriteResIntPair<WriteDiv16, [JALU1, JDiv], 17, [1, 17], 2>;
defm : JWriteResIntPair<WriteDiv32, [JALU1, JDiv], 25, [1, 25], 2>;
Modified: llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleSLM.td?rev=342892&r1=342891&r2=342892&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleSLM.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleSLM.td Mon Sep 24 08:21:57 2018
@@ -95,8 +95,17 @@ def : InstRW<[WriteMove], (instrs COPY)>
defm : SLMWriteResPair<WriteALU, [SLM_IEC_RSV01], 1>;
defm : SLMWriteResPair<WriteADC, [SLM_IEC_RSV01], 1>;
-defm : SLMWriteResPair<WriteIMul, [SLM_IEC_RSV1], 3>;
-defm : SLMWriteResPair<WriteIMul64, [SLM_IEC_RSV1], 3>;
+
+defm : SLMWriteResPair<WriteIMul8, [SLM_IEC_RSV1], 3>;
+defm : SLMWriteResPair<WriteIMul16, [SLM_IEC_RSV1], 3>;
+defm : SLMWriteResPair<WriteIMul16Imm, [SLM_IEC_RSV1], 3>;
+defm : SLMWriteResPair<WriteIMul16Reg, [SLM_IEC_RSV1], 3>;
+defm : SLMWriteResPair<WriteIMul32, [SLM_IEC_RSV1], 3>;
+defm : SLMWriteResPair<WriteIMul32Imm, [SLM_IEC_RSV1], 3>;
+defm : SLMWriteResPair<WriteIMul32Reg, [SLM_IEC_RSV1], 3>;
+defm : SLMWriteResPair<WriteIMul64, [SLM_IEC_RSV1], 3>;
+defm : SLMWriteResPair<WriteIMul64Imm, [SLM_IEC_RSV1], 3>;
+defm : SLMWriteResPair<WriteIMul64Reg, [SLM_IEC_RSV1], 3>;
defm : X86WriteRes<WriteBSWAP32, [SLM_IEC_RSV01], 1, [1], 1>;
defm : X86WriteRes<WriteBSWAP64, [SLM_IEC_RSV01], 1, [1], 1>;
Modified: llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td?rev=342892&r1=342891&r2=342892&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td Mon Sep 24 08:21:57 2018
@@ -177,8 +177,17 @@ def : WriteRes<WriteZero, []>;
def : WriteRes<WriteLEA, [ZnALU]>;
defm : ZnWriteResPair<WriteALU, [ZnALU], 1>;
defm : ZnWriteResPair<WriteADC, [ZnALU], 1>;
-defm : ZnWriteResPair<WriteIMul, [ZnALU1, ZnMultiplier], 4>;
-defm : ZnWriteResPair<WriteIMul64, [ZnALU1, ZnMultiplier], 4, [1,1], 2>;
+
+defm : ZnWriteResPair<WriteIMul8, [ZnALU1, ZnMultiplier], 4>;
+//defm : ZnWriteResPair<WriteIMul16, [ZnALU1, ZnMultiplier], 4>;
+//defm : ZnWriteResPair<WriteIMul16Imm, [ZnALU1, ZnMultiplier], 4>;
+//defm : ZnWriteResPair<WriteIMul16Reg, [ZnALU1, ZnMultiplier], 4>;
+//defm : ZnWriteResPair<WriteIMul32, [ZnALU1, ZnMultiplier], 4>;
+//defm : ZnWriteResPair<WriteIMul32Imm, [ZnALU1, ZnMultiplier], 4>;
+//defm : ZnWriteResPair<WriteIMul32Reg, [ZnALU1, ZnMultiplier], 4>;
+//defm : ZnWriteResPair<WriteIMul64, [ZnALU1, ZnMultiplier], 4, [1,1], 2>;
+//defm : ZnWriteResPair<WriteIMul64Imm, [ZnALU1, ZnMultiplier], 4, [1,1], 2>;
+//defm : ZnWriteResPair<WriteIMul64Reg, [ZnALU1, ZnMultiplier], 4, [1,1], 2>;
defm : X86WriteRes<WriteBSWAP32, [ZnALU], 1, [4], 1>;
defm : X86WriteRes<WriteBSWAP64, [ZnALU], 1, [4], 1>;
@@ -581,45 +590,51 @@ def : InstRW<[WriteALULd],
def ZnWriteMul16 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
let Latency = 3;
}
-def : InstRW<[ZnWriteMul16], (instrs IMUL16r, MUL16r)>;
-def : InstRW<[ZnWriteMul16], (instrs IMUL16rr, IMUL16rri, IMUL16rri8)>; // TODO: is this right?
-def : InstRW<[ZnWriteMul16], (instrs IMUL16rm, IMUL16rmi, IMUL16rmi8)>; // TODO: this is definitely wrong but matches what the instregex did.
+def : SchedAlias<WriteIMul16, ZnWriteMul16>;
+def : SchedAlias<WriteIMul16Imm, ZnWriteMul16>; // TODO: is this right?
+def : SchedAlias<WriteIMul16Reg, ZnWriteMul16>; // TODO: is this right?
+def : SchedAlias<WriteIMul16ImmLd, ZnWriteMul16>; // TODO: this is definitely wrong but matches what the instregex did.
+def : SchedAlias<WriteIMul16RegLd, ZnWriteMul16>; // TODO: this is definitely wrong but matches what the instregex did.
// m16.
def ZnWriteMul16Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
let Latency = 8;
}
-def : InstRW<[ZnWriteMul16Ld, ReadAfterLd], (instrs IMUL16m, MUL16m)>;
+def : SchedAlias<WriteIMul16Ld, ZnWriteMul16Ld>;
// r32.
def ZnWriteMul32 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
let Latency = 3;
}
-def : InstRW<[ZnWriteMul32], (instrs IMUL32r, MUL32r)>;
-def : InstRW<[ZnWriteMul32], (instrs IMUL32rr, IMUL32rri, IMUL32rri8)>; // TODO: is this right?
-def : InstRW<[ZnWriteMul32], (instrs IMUL32rm, IMUL32rmi, IMUL32rmi8)>; // TODO: this is definitely wrong but matches what the instregex did.
+def : SchedAlias<WriteIMul32, ZnWriteMul32>;
+def : SchedAlias<WriteIMul32Imm, ZnWriteMul32>; // TODO: is this right?
+def : SchedAlias<WriteIMul32Reg, ZnWriteMul32>; // TODO: is this right?
+def : SchedAlias<WriteIMul32ImmLd, ZnWriteMul32>; // TODO: this is definitely wrong but matches what the instregex did.
+def : SchedAlias<WriteIMul32RegLd, ZnWriteMul32>; // TODO: this is definitely wrong but matches what the instregex did.
// m32.
def ZnWriteMul32Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
let Latency = 8;
}
-def : InstRW<[ZnWriteMul32Ld, ReadAfterLd], (instrs IMUL32m, MUL32m)>;
+def : SchedAlias<WriteIMul32Ld, ZnWriteMul32Ld>;
// r64.
def ZnWriteMul64 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
let Latency = 4;
let NumMicroOps = 2;
}
-def : InstRW<[ZnWriteMul64], (instrs IMUL64r, MUL64r)>;
-def : InstRW<[ZnWriteMul64], (instrs IMUL64rr, IMUL64rri8, IMUL64rri32)>; // TODO: is this right?
-def : InstRW<[ZnWriteMul64], (instrs IMUL64rm, IMUL64rmi32, IMUL64rmi8)>; // TODO: this is definitely wrong but matches what the instregex did.
+def : SchedAlias<WriteIMul64, ZnWriteMul64>;
+def : SchedAlias<WriteIMul64Imm, ZnWriteMul64>; // TODO: is this right?
+def : SchedAlias<WriteIMul64Reg, ZnWriteMul64>; // TODO: is this right?
+def : SchedAlias<WriteIMul64ImmLd, ZnWriteMul64>; // TODO: this is definitely wrong but matches what the instregex did.
+def : SchedAlias<WriteIMul64RegLd, ZnWriteMul64>; // TODO: this is definitely wrong but matches what the instregex did.
// m64.
def ZnWriteMul64Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
let Latency = 9;
let NumMicroOps = 2;
}
-def : InstRW<[ZnWriteMul64Ld, ReadAfterLd], (instrs IMUL64m, MUL64m)>;
+def : SchedAlias<WriteIMul64Ld, ZnWriteMul64Ld>;
// MULX.
// r32,r32,r32.
Modified: llvm/trunk/test/CodeGen/X86/bmi2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bmi2-schedule.ll?rev=342892&r1=342891&r2=342892&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bmi2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bmi2-schedule.ll Mon Sep 24 08:21:57 2018
@@ -110,8 +110,8 @@ define void @test_mulx_i32(i32 %a0, i32
; GENERIC-LABEL: test_mulx_i32:
; GENERIC: # %bb.0:
; GENERIC-NEXT: #APP
-; GENERIC-NEXT: mulxl %esi, %esi, %edi # sched: [3:1.00]
-; GENERIC-NEXT: mulxl (%rdx), %esi, %edi # sched: [8:1.00]
+; GENERIC-NEXT: mulxl %esi, %esi, %edi # sched: [4:1.00]
+; GENERIC-NEXT: mulxl (%rdx), %esi, %edi # sched: [9:1.00]
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -163,8 +163,8 @@ define i64 @test_mulx_i64(i64 %a0, i64 %
; GENERIC: # %bb.0:
; GENERIC-NEXT: movq %rdx, %rax # sched: [1:0.33]
; GENERIC-NEXT: movq %rdi, %rdx # sched: [1:0.33]
-; GENERIC-NEXT: mulxq %rsi, %rsi, %rcx # sched: [3:1.00]
-; GENERIC-NEXT: mulxq (%rax), %rdx, %rax # sched: [8:1.00]
+; GENERIC-NEXT: mulxq %rsi, %rsi, %rcx # sched: [4:1.00]
+; GENERIC-NEXT: mulxq (%rax), %rdx, %rax # sched: [9:1.00]
; GENERIC-NEXT: orq %rcx, %rax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
Modified: llvm/trunk/test/tools/llvm-mca/X86/Broadwell/resources-bmi2.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/Broadwell/resources-bmi2.s?rev=342892&r1=342891&r2=342892&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/Broadwell/resources-bmi2.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/Broadwell/resources-bmi2.s Mon Sep 24 08:21:57 2018
@@ -62,10 +62,10 @@ shrx %rax, (%rbx), %rcx
# CHECK-NEXT: 2 6 0.50 * bzhil %eax, (%rbx), %ecx
# CHECK-NEXT: 1 1 0.50 bzhiq %rax, %rbx, %rcx
# CHECK-NEXT: 2 6 0.50 * bzhiq %rax, (%rbx), %rcx
-# CHECK-NEXT: 3 4 1.00 mulxl %eax, %ebx, %ecx
-# CHECK-NEXT: 4 9 1.00 * mulxl (%rax), %ebx, %ecx
-# CHECK-NEXT: 2 4 1.00 mulxq %rax, %rbx, %rcx
-# CHECK-NEXT: 3 9 1.00 * mulxq (%rax), %rbx, %rcx
+# CHECK-NEXT: 4 4 1.00 mulxl %eax, %ebx, %ecx
+# CHECK-NEXT: 5 9 1.00 * mulxl (%rax), %ebx, %ecx
+# CHECK-NEXT: 3 4 1.00 mulxq %rax, %rbx, %rcx
+# CHECK-NEXT: 4 9 1.00 * mulxq (%rax), %rbx, %rcx
# CHECK-NEXT: 1 3 1.00 pdepl %eax, %ebx, %ecx
# CHECK-NEXT: 2 8 1.00 * pdepl (%rax), %ebx, %ecx
# CHECK-NEXT: 1 3 1.00 pdepq %rax, %rbx, %rcx
Modified: llvm/trunk/test/tools/llvm-mca/X86/Generic/resources-bmi2.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/Generic/resources-bmi2.s?rev=342892&r1=342891&r2=342892&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/Generic/resources-bmi2.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/Generic/resources-bmi2.s Mon Sep 24 08:21:57 2018
@@ -62,10 +62,10 @@ shrx %rax, (%rbx), %rcx
# CHECK-NEXT: 2 6 1.00 * bzhil %eax, (%rbx), %ecx
# CHECK-NEXT: 1 1 1.00 bzhiq %rax, %rbx, %rcx
# CHECK-NEXT: 2 6 1.00 * bzhiq %rax, (%rbx), %rcx
-# CHECK-NEXT: 2 3 1.00 mulxl %eax, %ebx, %ecx
-# CHECK-NEXT: 3 8 1.00 * mulxl (%rax), %ebx, %ecx
-# CHECK-NEXT: 2 3 1.00 mulxq %rax, %rbx, %rcx
-# CHECK-NEXT: 3 8 1.00 * mulxq (%rax), %rbx, %rcx
+# CHECK-NEXT: 4 4 1.00 mulxl %eax, %ebx, %ecx
+# CHECK-NEXT: 5 9 1.00 * mulxl (%rax), %ebx, %ecx
+# CHECK-NEXT: 3 4 1.00 mulxq %rax, %rbx, %rcx
+# CHECK-NEXT: 4 9 1.00 * mulxq (%rax), %rbx, %rcx
# CHECK-NEXT: 1 1 0.33 pdepl %eax, %ebx, %ecx
# CHECK-NEXT: 2 6 0.50 * pdepl (%rax), %ebx, %ecx
# CHECK-NEXT: 1 1 0.33 pdepq %rax, %rbx, %rcx
@@ -103,7 +103,7 @@ shrx %rax, (%rbx), %rcx
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
-# CHECK-NEXT: - - 10.67 10.67 - 10.67 8.00 8.00
+# CHECK-NEXT: - - 14.33 11.33 - 12.33 8.00 8.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
@@ -111,10 +111,10 @@ shrx %rax, (%rbx), %rcx
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 bzhil %eax, (%rbx), %ecx
# CHECK-NEXT: - - - 1.00 - - - - bzhiq %rax, %rbx, %rcx
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 bzhiq %rax, (%rbx), %rcx
-# CHECK-NEXT: - - - 1.00 - - - - mulxl %eax, %ebx, %ecx
-# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 mulxl (%rax), %ebx, %ecx
-# CHECK-NEXT: - - - 1.00 - - - - mulxq %rax, %rbx, %rcx
-# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 mulxq (%rax), %rbx, %rcx
+# CHECK-NEXT: - - 0.83 1.33 - 0.83 - - mulxl %eax, %ebx, %ecx
+# CHECK-NEXT: - - 0.83 1.33 - 0.83 0.50 0.50 mulxl (%rax), %ebx, %ecx
+# CHECK-NEXT: - - 1.00 1.00 - - - - mulxq %rax, %rbx, %rcx
+# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 mulxq (%rax), %rbx, %rcx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - pdepl %eax, %ebx, %ecx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 pdepl (%rax), %ebx, %ecx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - pdepq %rax, %rbx, %rcx
Modified: llvm/trunk/test/tools/llvm-mca/X86/Haswell/resources-bmi2.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/Haswell/resources-bmi2.s?rev=342892&r1=342891&r2=342892&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/Haswell/resources-bmi2.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/Haswell/resources-bmi2.s Mon Sep 24 08:21:57 2018
@@ -62,10 +62,10 @@ shrx %rax, (%rbx), %rcx
# CHECK-NEXT: 2 6 0.50 * bzhil %eax, (%rbx), %ecx
# CHECK-NEXT: 1 1 0.50 bzhiq %rax, %rbx, %rcx
# CHECK-NEXT: 2 6 0.50 * bzhiq %rax, (%rbx), %rcx
-# CHECK-NEXT: 3 4 1.00 mulxl %eax, %ebx, %ecx
-# CHECK-NEXT: 4 9 1.00 * mulxl (%rax), %ebx, %ecx
-# CHECK-NEXT: 2 4 1.00 mulxq %rax, %rbx, %rcx
-# CHECK-NEXT: 3 9 1.00 * mulxq (%rax), %rbx, %rcx
+# CHECK-NEXT: 4 4 1.00 mulxl %eax, %ebx, %ecx
+# CHECK-NEXT: 5 9 1.00 * mulxl (%rax), %ebx, %ecx
+# CHECK-NEXT: 3 4 1.00 mulxq %rax, %rbx, %rcx
+# CHECK-NEXT: 4 9 1.00 * mulxq (%rax), %rbx, %rcx
# CHECK-NEXT: 1 3 1.00 pdepl %eax, %ebx, %ecx
# CHECK-NEXT: 2 8 1.00 * pdepl (%rax), %ebx, %ecx
# CHECK-NEXT: 1 3 1.00 pdepq %rax, %rbx, %rcx
Modified: llvm/trunk/test/tools/llvm-mca/X86/SkylakeClient/resources-bmi2.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/SkylakeClient/resources-bmi2.s?rev=342892&r1=342891&r2=342892&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/SkylakeClient/resources-bmi2.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/SkylakeClient/resources-bmi2.s Mon Sep 24 08:21:57 2018
@@ -62,10 +62,10 @@ shrx %rax, (%rbx), %rcx
# CHECK-NEXT: 2 6 0.50 * bzhil %eax, (%rbx), %ecx
# CHECK-NEXT: 1 1 0.50 bzhiq %rax, %rbx, %rcx
# CHECK-NEXT: 2 6 0.50 * bzhiq %rax, (%rbx), %rcx
-# CHECK-NEXT: 3 4 1.00 mulxl %eax, %ebx, %ecx
-# CHECK-NEXT: 4 9 1.00 * mulxl (%rax), %ebx, %ecx
-# CHECK-NEXT: 2 4 1.00 mulxq %rax, %rbx, %rcx
-# CHECK-NEXT: 3 9 1.00 * mulxq (%rax), %rbx, %rcx
+# CHECK-NEXT: 4 4 1.00 mulxl %eax, %ebx, %ecx
+# CHECK-NEXT: 5 9 1.00 * mulxl (%rax), %ebx, %ecx
+# CHECK-NEXT: 3 4 1.00 mulxq %rax, %rbx, %rcx
+# CHECK-NEXT: 4 9 1.00 * mulxq (%rax), %rbx, %rcx
# CHECK-NEXT: 1 3 1.00 pdepl %eax, %ebx, %ecx
# CHECK-NEXT: 2 8 1.00 * pdepl (%rax), %ebx, %ecx
# CHECK-NEXT: 1 3 1.00 pdepq %rax, %rbx, %rcx
Modified: llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-bmi2.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-bmi2.s?rev=342892&r1=342891&r2=342892&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-bmi2.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/SkylakeServer/resources-bmi2.s Mon Sep 24 08:21:57 2018
@@ -62,10 +62,10 @@ shrx %rax, (%rbx), %rcx
# CHECK-NEXT: 2 6 0.50 * bzhil %eax, (%rbx), %ecx
# CHECK-NEXT: 1 1 0.50 bzhiq %rax, %rbx, %rcx
# CHECK-NEXT: 2 6 0.50 * bzhiq %rax, (%rbx), %rcx
-# CHECK-NEXT: 3 4 1.00 mulxl %eax, %ebx, %ecx
-# CHECK-NEXT: 4 9 1.00 * mulxl (%rax), %ebx, %ecx
-# CHECK-NEXT: 2 4 1.00 mulxq %rax, %rbx, %rcx
-# CHECK-NEXT: 3 9 1.00 * mulxq (%rax), %rbx, %rcx
+# CHECK-NEXT: 4 4 1.00 mulxl %eax, %ebx, %ecx
+# CHECK-NEXT: 5 9 1.00 * mulxl (%rax), %ebx, %ecx
+# CHECK-NEXT: 3 4 1.00 mulxq %rax, %rbx, %rcx
+# CHECK-NEXT: 4 9 1.00 * mulxq (%rax), %rbx, %rcx
# CHECK-NEXT: 1 3 1.00 pdepl %eax, %ebx, %ecx
# CHECK-NEXT: 2 8 1.00 * pdepl (%rax), %ebx, %ecx
# CHECK-NEXT: 1 3 1.00 pdepq %rax, %rbx, %rcx
More information about the llvm-commits
mailing list