[llvm] 35d4292 - [X86][SchedModels] Fix missing ReadAdvance for MULX and ADCX/ADOX (PR51494)

Andrea Di Biagio via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 20 09:40:09 PDT 2021


Author: Andrea Di Biagio
Date: 2021-08-20T17:39:51+01:00
New Revision: 35d4292a734b83e63c19f4f390f47e5a18094204

URL: https://github.com/llvm/llvm-project/commit/35d4292a734b83e63c19f4f390f47e5a18094204
DIFF: https://github.com/llvm/llvm-project/commit/35d4292a734b83e63c19f4f390f47e5a18094204.diff

LOG: [X86][SchedModels] Fix missing ReadAdvance for MULX and ADCX/ADOX (PR51494)

Before this patch, instructions MULX32rm and MULX64rm were missing a ReadAdvance
for the implicit read of register EDX/RDX.  This patch fixes the issue, and it
also introduces a new SchedWrite for the two variants of MULX. The general idea
behind this last change is to eventually decrease the number of InstRW in the
scheduling models.

This patch also adds a ReadAdvance for the implicit read of EFLAGS in ADCX/ADOX.

Differential Revision: https://reviews.llvm.org/D108372

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86InstrArithmetic.td
    llvm/lib/Target/X86/X86SchedBroadwell.td
    llvm/lib/Target/X86/X86SchedHaswell.td
    llvm/lib/Target/X86/X86SchedSandyBridge.td
    llvm/lib/Target/X86/X86SchedSkylakeClient.td
    llvm/lib/Target/X86/X86SchedSkylakeServer.td
    llvm/lib/Target/X86/X86Schedule.td
    llvm/lib/Target/X86/X86ScheduleAtom.td
    llvm/lib/Target/X86/X86ScheduleBdVer2.td
    llvm/lib/Target/X86/X86ScheduleBtVer2.td
    llvm/lib/Target/X86/X86ScheduleSLM.td
    llvm/lib/Target/X86/X86ScheduleZnver1.td
    llvm/lib/Target/X86/X86ScheduleZnver2.td
    llvm/lib/Target/X86/X86ScheduleZnver3.td
    llvm/test/tools/llvm-mca/X86/Haswell/adcx-adox-read-advance.s
    llvm/test/tools/llvm-mca/X86/Haswell/mulx-read-advance.s
    llvm/test/tools/llvm-mca/X86/Znver1/resources-bmi2.s
    llvm/test/tools/llvm-mca/X86/Znver2/adcx-adox-read-advance.s
    llvm/test/tools/llvm-mca/X86/Znver2/mulx-read-advance.s
    llvm/test/tools/llvm-mca/X86/Znver2/resources-bmi2.s

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td
index ba00e7da81f9..afbac2109414 100644
--- a/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -1502,8 +1502,12 @@ let hasSideEffects = 0 in {
   let mayLoad = 1 in
   def rm : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src),
              !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
-
-             []>, T8XD, VEX_4V, Sched<[sched.Folded, WriteIMulH]>;
+             []>, T8XD, VEX_4V,
+             Sched<[sched.Folded, WriteIMulH,
+                    // Memory operand.
+                    ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+                    // Implicit read of EDX/RDX
+                    sched.ReadAfterFold]>;
 
   // Pseudo instructions to be used when the low result isn't used. The
   // instruction is defined to keep the high if both destinations are the same.
@@ -1518,9 +1522,9 @@ let hasSideEffects = 0 in {
 
 let Predicates = [HasBMI2] in {
   let Uses = [EDX] in
-    defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem, WriteIMul32>;
+    defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem, WriteMULX32>;
   let Uses = [RDX] in
-    defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem, WriteIMul64>, VEX_W;
+    defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem, WriteMULX64>, VEX_W;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1547,7 +1551,12 @@ let Predicates = [HasADX], Defs = [EFLAGS], Uses = [EFLAGS],
                     "adox{q}\t{$src2, $dst|$dst, $src2}", []>, T8XS;
   } // SchedRW
 
-  let mayLoad = 1, SchedRW = [WriteADC.Folded, WriteADC.ReadAfterFold] in {
+  let mayLoad = 1,
+      SchedRW = [WriteADC.Folded, WriteADC.ReadAfterFold,
+                 // Memory operand.
+                 ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+                 // Implicit read of EFLAGS
+                 WriteADC.ReadAfterFold] in {
   def ADCX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst),
                    (ins GR32:$src1, i32mem:$src2),
                    "adcx{l}\t{$src2, $dst|$dst, $src2}", []>, T8PD;

diff  --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td
index d2ced1c67407..de8b40a69b2f 100644
--- a/llvm/lib/Target/X86/X86SchedBroadwell.td
+++ b/llvm/lib/Target/X86/X86SchedBroadwell.td
@@ -123,9 +123,11 @@ defm : X86WriteRes<WriteIMul16Imm,    [BWPort1,BWPort0156], 4, [1,1], 2>;
 defm : X86WriteRes<WriteIMul16ImmLd,  [BWPort1,BWPort0156,BWPort23], 8, [1,1,1], 3>;
 defm : BWWriteResPair<WriteIMul16Reg, [BWPort1],   3>;
 defm : BWWriteResPair<WriteIMul32,    [BWPort1,BWPort06,BWPort0156], 4, [1,1,1], 3>;
+defm : BWWriteResPair<WriteMULX32,    [BWPort1,BWPort06,BWPort0156], 4, [1,1,1], 3>;
 defm : BWWriteResPair<WriteIMul32Imm, [BWPort1],   3>;
 defm : BWWriteResPair<WriteIMul32Reg, [BWPort1],   3>;
 defm : BWWriteResPair<WriteIMul64,    [BWPort1,BWPort5], 4, [1,1], 2>;
+defm : BWWriteResPair<WriteMULX64,    [BWPort1,BWPort5], 4, [1,1], 2>;
 defm : BWWriteResPair<WriteIMul64Imm, [BWPort1],   3>;
 defm : BWWriteResPair<WriteIMul64Reg, [BWPort1],   3>;
 def : WriteRes<WriteIMulH, []> { let Latency = 3; }

diff  --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td
index 99fddcd4b2d5..e9922ceab69b 100644
--- a/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -140,9 +140,11 @@ defm : X86WriteRes<WriteIMul16Imm,    [HWPort1,HWPort0156], 4, [1,1], 2>;
 defm : X86WriteRes<WriteIMul16ImmLd,  [HWPort1,HWPort0156,HWPort23], 8, [1,1,1], 3>;
 defm : HWWriteResPair<WriteIMul16Reg, [HWPort1],   3>;
 defm : HWWriteResPair<WriteIMul32,    [HWPort1,HWPort06,HWPort0156], 4, [1,1,1], 3>;
+defm : HWWriteResPair<WriteMULX32,    [HWPort1,HWPort06,HWPort0156], 4, [1,1,1], 3>;
 defm : HWWriteResPair<WriteIMul32Imm, [HWPort1],   3>;
 defm : HWWriteResPair<WriteIMul32Reg, [HWPort1],   3>;
 defm : HWWriteResPair<WriteIMul64,    [HWPort1,HWPort6], 4, [1,1], 2>;
+defm : HWWriteResPair<WriteMULX64,    [HWPort1,HWPort6], 4, [1,1], 2>;
 defm : HWWriteResPair<WriteIMul64Imm, [HWPort1],   3>;
 defm : HWWriteResPair<WriteIMul64Reg, [HWPort1],   3>;
 def  : WriteRes<WriteIMulH, []> { let Latency = 3; }

diff  --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td
index 2f7157f43268..d6d8fc6c74d0 100644
--- a/llvm/lib/Target/X86/X86SchedSandyBridge.td
+++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td
@@ -124,9 +124,11 @@ defm : X86WriteRes<WriteIMul16Imm,    [SBPort1,SBPort015], 4, [1,1], 2>;
 defm : X86WriteRes<WriteIMul16ImmLd,  [SBPort1,SBPort015,SBPort23], 8, [1,1,1], 3>;
 defm : SBWriteResPair<WriteIMul16Reg, [SBPort1],   3>;
 defm : SBWriteResPair<WriteIMul32,    [SBPort1,SBPort05,SBPort015], 4, [1,1,1], 3>;
+defm : SBWriteResPair<WriteMULX32,    [SBPort1,SBPort05,SBPort015], 4, [1,1,1], 3>;
 defm : SBWriteResPair<WriteIMul32Imm, [SBPort1],   3>;
 defm : SBWriteResPair<WriteIMul32Reg, [SBPort1],   3>;
 defm : SBWriteResPair<WriteIMul64,    [SBPort1,SBPort0], 4, [1,1], 2>;
+defm : SBWriteResPair<WriteMULX64,    [SBPort1,SBPort0], 4, [1,1], 2>;
 defm : SBWriteResPair<WriteIMul64Imm, [SBPort1],   3>;
 defm : SBWriteResPair<WriteIMul64Reg, [SBPort1],   3>;
 def  : WriteRes<WriteIMulH, []> { let Latency = 3; }

diff  --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
index 8486bdda0349..b0d586c2b463 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
@@ -122,9 +122,11 @@ defm : X86WriteRes<WriteIMul16Imm,     [SKLPort1,SKLPort0156], 4, [1,1], 2>;
 defm : X86WriteRes<WriteIMul16ImmLd,   [SKLPort1,SKLPort0156,SKLPort23], 8, [1,1,1], 3>;
 defm : SKLWriteResPair<WriteIMul16Reg, [SKLPort1],   3>;
 defm : SKLWriteResPair<WriteIMul32,    [SKLPort1,SKLPort06,SKLPort0156], 4, [1,1,1], 3>;
+defm : SKLWriteResPair<WriteMULX32,    [SKLPort1,SKLPort06,SKLPort0156], 4, [1,1,1], 3>;
 defm : SKLWriteResPair<WriteIMul32Imm, [SKLPort1],   3>;
 defm : SKLWriteResPair<WriteIMul32Reg, [SKLPort1],   3>;
 defm : SKLWriteResPair<WriteIMul64,    [SKLPort1,SKLPort5], 4, [1,1], 2>;
+defm : SKLWriteResPair<WriteMULX64,    [SKLPort1,SKLPort5], 4, [1,1], 2>;
 defm : SKLWriteResPair<WriteIMul64Imm, [SKLPort1],   3>;
 defm : SKLWriteResPair<WriteIMul64Reg, [SKLPort1],   3>;
 def : WriteRes<WriteIMulH, []> { let Latency = 3; }

diff  --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index ba80d47c4eb6..f2f72da8a7ef 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -123,9 +123,11 @@ defm : X86WriteRes<WriteIMul16ImmLd,   [SKXPort1,SKXPort0156,SKXPort23], 8, [1,1
 defm : X86WriteRes<WriteIMul16Reg,     [SKXPort1],   3, [1], 1>;
 defm : X86WriteRes<WriteIMul16RegLd,   [SKXPort1,SKXPort0156,SKXPort23], 8, [1,1,1], 3>;
 defm : SKXWriteResPair<WriteIMul32,    [SKXPort1,SKXPort06,SKXPort0156], 4, [1,1,1], 3>;
+defm : SKXWriteResPair<WriteMULX32,    [SKXPort1,SKXPort06,SKXPort0156], 4, [1,1,1], 3>;
 defm : SKXWriteResPair<WriteIMul32Imm, [SKXPort1],   3>;
 defm : SKXWriteResPair<WriteIMul32Reg, [SKXPort1],   3>;
 defm : SKXWriteResPair<WriteIMul64,    [SKXPort1,SKXPort5], 4, [1,1], 2>;
+defm : SKXWriteResPair<WriteMULX64,    [SKXPort1,SKXPort5], 4, [1,1], 2>;
 defm : SKXWriteResPair<WriteIMul64Imm, [SKXPort1],   3>;
 defm : SKXWriteResPair<WriteIMul64Reg, [SKXPort1],   3>;
 def : WriteRes<WriteIMulH, []> { let Latency = 3; }

diff  --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td
index ae1c7be86624..bf654f3be319 100644
--- a/llvm/lib/Target/X86/X86Schedule.td
+++ b/llvm/lib/Target/X86/X86Schedule.td
@@ -148,7 +148,9 @@ defm WriteIMul32Reg : X86SchedWritePair; // Integer 32-bit multiplication by reg
 defm WriteIMul64    : X86SchedWritePair; // Integer 64-bit multiplication.
 defm WriteIMul64Imm : X86SchedWritePair; // Integer 64-bit multiplication by immediate.
 defm WriteIMul64Reg : X86SchedWritePair; // Integer 64-bit multiplication by register.
-def  WriteIMulH     : SchedWrite;        // Integer multiplication, high part.
+defm WriteMULX32    : X86SchedWritePair; // Integer 32-bit Multiplication without affecting flags.
+defm WriteMULX64    : X86SchedWritePair; // Integer 64-bit Multiplication without affecting flags.
+def  WriteIMulH     : SchedWrite;        // Integer multiplication, high part (only used by MULX).
 
 def  WriteBSWAP32 : SchedWrite; // Byte Order (Endianness) 32-bit Swap.
 def  WriteBSWAP64 : SchedWrite; // Byte Order (Endianness) 64-bit Swap.

diff  --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td
index d00c2e3718d3..1412649551c4 100644
--- a/llvm/lib/Target/X86/X86ScheduleAtom.td
+++ b/llvm/lib/Target/X86/X86ScheduleAtom.td
@@ -91,6 +91,8 @@ defm : AtomWriteResPair<WriteIMul64,    [AtomPort01], [AtomPort01], 12, 12, [12]
 defm : AtomWriteResPair<WriteIMul64Imm, [AtomPort01], [AtomPort01], 14, 14, [14], [14]>;
 defm : AtomWriteResPair<WriteIMul64Reg, [AtomPort01], [AtomPort01], 12, 12, [12], [12]>;
 defm : X86WriteResUnsupported<WriteIMulH>;
+defm : X86WriteResPairUnsupported<WriteMULX32>;
+defm : X86WriteResPairUnsupported<WriteMULX64>;
 
 defm : X86WriteRes<WriteXCHG,        [AtomPort01], 2, [2], 1>;
 defm : X86WriteRes<WriteBSWAP32,     [AtomPort0], 1, [1], 1>;

diff  --git a/llvm/lib/Target/X86/X86ScheduleBdVer2.td b/llvm/lib/Target/X86/X86ScheduleBdVer2.td
index 99d4011dae77..3012fd4d62ff 100644
--- a/llvm/lib/Target/X86/X86ScheduleBdVer2.td
+++ b/llvm/lib/Target/X86/X86ScheduleBdVer2.td
@@ -435,7 +435,11 @@ defm : PdWriteResExPair<WriteIMul32Reg, [PdEX1, PdMul],          4,  [1, 2]>;
 defm : PdWriteResExPair<WriteIMul64,    [PdEX1, PdMul],          6,  [1, 6]>;
 defm : PdWriteResExPair<WriteIMul64Imm, [PdEX1, PdMul],          6,  [1, 4],1, 1>;
 defm : PdWriteResExPair<WriteIMul64Reg, [PdEX1, PdMul],          6,  [1, 4]>;
-defm : X86WriteResUnsupported<WriteIMulH>; // BMI2 MULX
+
+// BMI2 MULX
+defm : X86WriteResUnsupported<WriteIMulH>;
+defm : X86WriteResPairUnsupported<WriteMULX32>;
+defm : X86WriteResPairUnsupported<WriteMULX64>;
 
 defm : PdWriteResExPair<WriteDiv8,    [PdEX1, PdDiv],           12,  [1, 12]>;
 defm : PdWriteResExPair<WriteDiv16,   [PdEX1, PdDiv],           15,  [1, 15],   2>;

diff  --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
index cdd03830bcad..530429688709 100644
--- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td
+++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
@@ -209,7 +209,9 @@ defm : JWriteResIntPair<WriteIMul32Reg, [JALU1, JMul], 3, [1, 1], 1>;
 defm : JWriteResIntPair<WriteIMul64,    [JALU1, JMul], 6, [1, 4], 2>;  
 defm : JWriteResIntPair<WriteIMul64Imm, [JALU1, JMul], 6, [1, 4], 1>;
 defm : JWriteResIntPair<WriteIMul64Reg, [JALU1, JMul], 6, [1, 4], 1>;
-defm : X86WriteRes<WriteIMulH,          [JALU1], 6, [4], 1>;
+defm : X86WriteResUnsupported<WriteIMulH>;
+defm : X86WriteResPairUnsupported<WriteMULX32>;
+defm : X86WriteResPairUnsupported<WriteMULX64>;
 
 defm : JWriteResIntPair<WriteDiv8,   [JALU1, JDiv], 12, [1, 12], 1>;
 defm : JWriteResIntPair<WriteDiv16,  [JALU1, JDiv], 17, [1, 17], 2>;

diff  --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td
index 123844a73a59..d2526472ed5e 100644
--- a/llvm/lib/Target/X86/X86ScheduleSLM.td
+++ b/llvm/lib/Target/X86/X86ScheduleSLM.td
@@ -111,7 +111,9 @@ defm : SLMWriteResPair<WriteIMul32Reg, [SLM_IEC_RSV1],  3>;
 defm : SLMWriteResPair<WriteIMul64,    [SLM_IEC_RSV1],  3>;
 defm : SLMWriteResPair<WriteIMul64Imm, [SLM_IEC_RSV1],  3>;
 defm : SLMWriteResPair<WriteIMul64Reg, [SLM_IEC_RSV1],  3>;
-def  : WriteRes<WriteIMulH, [SLM_FPC_RSV0]>;
+defm : X86WriteResUnsupported<WriteIMulH>;
+defm : X86WriteResPairUnsupported<WriteMULX32>;
+defm : X86WriteResPairUnsupported<WriteMULX64>;
 
 defm : X86WriteRes<WriteBSWAP32, [SLM_IEC_RSV01], 1, [1], 1>;
 defm : X86WriteRes<WriteBSWAP64, [SLM_IEC_RSV01], 1, [1], 1>;

diff  --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td
index 12f8e7cc76f7..3a1c27fda704 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver1.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td
@@ -256,8 +256,9 @@ defm : ZnWriteResPair<WriteIDiv32, [ZnALU2, ZnDivider], 25, [1,25], 2>;
 defm : ZnWriteResPair<WriteIDiv64, [ZnALU2, ZnDivider], 41, [1,41], 2>;
 
 // IMULH
-def  : WriteRes<WriteIMulH, [ZnALU1, ZnMultiplier]>{
-  let Latency = 4;
+def  : WriteRes<WriteIMulH, [ZnMultiplier]>{
+  let Latency = 3;
+  let NumMicroOps = 0;
 }
 
 // Floating point operations
@@ -659,32 +660,10 @@ def ZnWriteMul64Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
 }
 def : SchedAlias<WriteIMul64Ld, ZnWriteMul64Ld>;
 
-// MULX.
-// r32,r32,r32.
-def ZnWriteMulX32 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
-  let Latency = 3;
-  let ResourceCycles = [1, 2];
-}
-def : InstRW<[ZnWriteMulX32], (instrs MULX32rr)>;
-
-// r32,r32,m32.
-def ZnWriteMulX32Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
-  let Latency = 8;
-  let ResourceCycles = [1, 2, 2];
-}
-def : InstRW<[ZnWriteMulX32Ld, ReadAfterLd], (instrs MULX32rm)>;
-
-// r64,r64,r64.
-def ZnWriteMulX64 : SchedWriteRes<[ZnALU1]> {
-  let Latency = 3;
-}
-def : InstRW<[ZnWriteMulX64], (instrs MULX64rr)>;
-
-// r64,r64,m64.
-def ZnWriteMulX64Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
-  let Latency = 8;
-}
-def : InstRW<[ZnWriteMulX64Ld, ReadAfterLd], (instrs MULX64rm)>;
+// MULX
+// Numbers are based on the AMD SOG for Family 17h - Instruction Latencies.
+defm : ZnWriteResPair<WriteMULX32, [ZnALU1, ZnMultiplier], 3, [1, 1], 1, 5, 0>;
+defm : ZnWriteResPair<WriteMULX64, [ZnALU1, ZnMultiplier], 3, [1, 1], 1, 5, 0>;
 
 //-- Control transfer instructions --//
 

diff  --git a/llvm/lib/Target/X86/X86ScheduleZnver2.td b/llvm/lib/Target/X86/X86ScheduleZnver2.td
index 5b4b151d2938..75b9bce4507e 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver2.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver2.td
@@ -243,8 +243,9 @@ defm : Zn2WriteResPair<WriteIDiv32, [Zn2ALU2, Zn2Divider], 25, [1,25], 2>;
 defm : Zn2WriteResPair<WriteIDiv64, [Zn2ALU2, Zn2Divider], 41, [1,41], 2>;
 
 // IMULH
-def  : WriteRes<WriteIMulH, [Zn2ALU1, Zn2Multiplier]>{
-  let Latency = 4;
+def  : WriteRes<WriteIMulH, [Zn2Multiplier]>{
+  let Latency = 3;
+  let NumMicroOps = 0;
 }
 
 // Floating point operations
@@ -658,31 +659,9 @@ def : SchedAlias<WriteIMul64ImmLd, Zn2WriteMul64Ld>;
 def : SchedAlias<WriteIMul64RegLd, Zn2WriteMul64Ld>;
 
 // MULX.
-// r32,r32,r32.
-def Zn2WriteMulX32 : SchedWriteRes<[Zn2ALU1, Zn2Multiplier]> {
-  let Latency = 3;
-  let ResourceCycles = [1, 2];
-}
-def : InstRW<[Zn2WriteMulX32], (instrs MULX32rr)>;
-
-// r32,r32,m32.
-def Zn2WriteMulX32Ld : SchedWriteRes<[Zn2AGU, Zn2ALU1, Zn2Multiplier]> {
-  let Latency = 7;
-  let ResourceCycles = [1, 2, 2];
-}
-def : InstRW<[Zn2WriteMulX32Ld, ReadAfterLd], (instrs MULX32rm)>;
-
-// r64,r64,r64.
-def Zn2WriteMulX64 : SchedWriteRes<[Zn2ALU1]> {
-  let Latency = 3;
-}
-def : InstRW<[Zn2WriteMulX64], (instrs MULX64rr)>;
-
-// r64,r64,m64.
-def Zn2WriteMulX64Ld : SchedWriteRes<[Zn2AGU, Zn2ALU1, Zn2Multiplier]> {
-  let Latency = 7;
-}
-def : InstRW<[Zn2WriteMulX64Ld, ReadAfterLd], (instrs MULX64rm)>;
+// Numbers are based on the AMD SOG for Family 17h - Instruction Latencies.
+defm : Zn2WriteResPair<WriteMULX32, [Zn2ALU1, Zn2Multiplier], 3, [1, 1], 1, 4, 0>;
+defm : Zn2WriteResPair<WriteMULX64, [Zn2ALU1, Zn2Multiplier], 3, [1, 1], 1, 4, 0>;
 
 //-- Control transfer instructions --//
 

diff  --git a/llvm/lib/Target/X86/X86ScheduleZnver3.td b/llvm/lib/Target/X86/X86ScheduleZnver3.td
index 4a91a91a0f0f..ec4ae59aa074 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver3.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver3.td
@@ -617,6 +617,7 @@ defm : Zn3WriteResIntPair<WriteIMul16, [Zn3Multiplier], 3, [3], 3, /*LoadUOps=*/
 defm : Zn3WriteResIntPair<WriteIMul16Imm, [Zn3Multiplier], 4, [4], 2>; // Integer 16-bit multiplication by immediate.
 defm : Zn3WriteResIntPair<WriteIMul16Reg, [Zn3Multiplier], 3, [1], 1>; // Integer 16-bit multiplication by register.
 defm : Zn3WriteResIntPair<WriteIMul32, [Zn3Multiplier], 3, [3], 2>;    // Integer 32-bit multiplication.
+defm : Zn3WriteResIntPair<WriteMULX32, [Zn3Multiplier], 4, [1], 2>;    // Integer 32-bit Unsigned Multiply Without Affecting Flags.
 
 def Zn3MULX32rr : SchedWriteRes<[Zn3Multiplier]> {
   let Latency = 4;
@@ -630,11 +631,14 @@ def Zn3MULX32rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3Multiplier]> {
   let ResourceCycles = [1, 1, 2];
   let NumMicroOps = Zn3MULX32rr.NumMicroOps;
 }
-def : InstRW<[Zn3MULX32rm, WriteIMulH], (instrs MULX32rm)>;
+def : InstRW<[Zn3MULX32rm, WriteIMulH,
+              ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+              ReadAfterLd], (instrs MULX32rm)>;
 
 defm : Zn3WriteResIntPair<WriteIMul32Imm, [Zn3Multiplier], 3, [1], 1>; // Integer 32-bit multiplication by immediate.
 defm : Zn3WriteResIntPair<WriteIMul32Reg, [Zn3Multiplier], 3, [1], 1>; // Integer 32-bit multiplication by register.
 defm : Zn3WriteResIntPair<WriteIMul64, [Zn3Multiplier], 3, [3], 2>;    // Integer 64-bit multiplication.
+defm : Zn3WriteResIntPair<WriteMULX64, [Zn3Multiplier], 4, [1], 2>;    // Integer 32-bit Unsigned Multiply Without Affecting Flags.
 
 def Zn3MULX64rr : SchedWriteRes<[Zn3Multiplier]> {
   let Latency = 4;
@@ -648,7 +652,9 @@ def Zn3MULX64rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3Multiplier]> {
   let ResourceCycles = [1, 1, 2];
   let NumMicroOps = Zn3MULX64rr.NumMicroOps;
 }
-def : InstRW<[Zn3MULX64rm, WriteIMulH], (instrs MULX64rm)>;
+def : InstRW<[Zn3MULX64rm, WriteIMulH,
+              ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+              ReadAfterLd], (instrs MULX64rm)>;
 
 defm : Zn3WriteResIntPair<WriteIMul64Imm, [Zn3Multiplier], 3, [1], 1>; // Integer 64-bit multiplication by immediate.
 defm : Zn3WriteResIntPair<WriteIMul64Reg, [Zn3Multiplier], 3, [1], 1>; // Integer 64-bit multiplication by register.

diff  --git a/llvm/test/tools/llvm-mca/X86/Haswell/adcx-adox-read-advance.s b/llvm/test/tools/llvm-mca/X86/Haswell/adcx-adox-read-advance.s
index c101d824d86b..d418b6176c99 100644
--- a/llvm/test/tools/llvm-mca/X86/Haswell/adcx-adox-read-advance.s
+++ b/llvm/test/tools/llvm-mca/X86/Haswell/adcx-adox-read-advance.s
@@ -15,12 +15,12 @@ adox (%rdi), %rcx
 
 # CHECK:      Iterations:        2
 # CHECK-NEXT: Instructions:      2
-# CHECK-NEXT: Total Cycles:      17
+# CHECK-NEXT: Total Cycles:      12
 # CHECK-NEXT: Total uOps:        6
 
 # CHECK:      Dispatch Width:    4
-# CHECK-NEXT: uOps Per Cycle:    0.35
-# CHECK-NEXT: IPC:               0.12
+# CHECK-NEXT: uOps Per Cycle:    0.50
+# CHECK-NEXT: IPC:               0.17
 # CHECK-NEXT: Block RThroughput: 0.8
 
 # CHECK:      Instruction Info:
@@ -55,11 +55,11 @@ adox (%rdi), %rcx
 # CHECK-NEXT:  -      -     0.50   0.50   0.50   0.50    -     0.50   0.50    -     adcxq	(%rdi), %rcx
 
 # CHECK:      Timeline view:
-# CHECK-NEXT:                     0123456
+# CHECK-NEXT:                     01
 # CHECK-NEXT: Index     0123456789
 
-# CHECK:      [0,0]     DeeeeeeeER.    ..   adcxq	(%rdi), %rcx
-# CHECK-NEXT: [1,0]     .D======eeeeeeeER   adcxq	(%rdi), %rcx
+# CHECK:      [0,0]     DeeeeeeeER..   adcxq	(%rdi), %rcx
+# CHECK-NEXT: [1,0]     .D=eeeeeeeER   adcxq	(%rdi), %rcx
 
 # CHECK:      Average Wait times (based on the timeline view):
 # CHECK-NEXT: [0]: Executions
@@ -68,18 +68,18 @@ adox (%rdi), %rcx
 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
 
 # CHECK:            [0]    [1]    [2]    [3]
-# CHECK-NEXT: 0.     2     4.0    0.5    0.0       adcxq	(%rdi), %rcx
+# CHECK-NEXT: 0.     2     1.5    0.5    0.0       adcxq	(%rdi), %rcx
 
 # CHECK:      [1] Code Region
 
 # CHECK:      Iterations:        2
 # CHECK-NEXT: Instructions:      2
-# CHECK-NEXT: Total Cycles:      17
+# CHECK-NEXT: Total Cycles:      12
 # CHECK-NEXT: Total uOps:        6
 
 # CHECK:      Dispatch Width:    4
-# CHECK-NEXT: uOps Per Cycle:    0.35
-# CHECK-NEXT: IPC:               0.12
+# CHECK-NEXT: uOps Per Cycle:    0.50
+# CHECK-NEXT: IPC:               0.17
 # CHECK-NEXT: Block RThroughput: 0.8
 
 # CHECK:      Instruction Info:
@@ -114,11 +114,11 @@ adox (%rdi), %rcx
 # CHECK-NEXT:  -      -     0.50   0.50   0.50   0.50    -     0.50   0.50    -     adoxq	(%rdi), %rcx
 
 # CHECK:      Timeline view:
-# CHECK-NEXT:                     0123456
+# CHECK-NEXT:                     01
 # CHECK-NEXT: Index     0123456789
 
-# CHECK:      [0,0]     DeeeeeeeER.    ..   adoxq	(%rdi), %rcx
-# CHECK-NEXT: [1,0]     .D======eeeeeeeER   adoxq	(%rdi), %rcx
+# CHECK:      [0,0]     DeeeeeeeER..   adoxq	(%rdi), %rcx
+# CHECK-NEXT: [1,0]     .D=eeeeeeeER   adoxq	(%rdi), %rcx
 
 # CHECK:      Average Wait times (based on the timeline view):
 # CHECK-NEXT: [0]: Executions
@@ -127,4 +127,4 @@ adox (%rdi), %rcx
 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
 
 # CHECK:            [0]    [1]    [2]    [3]
-# CHECK-NEXT: 0.     2     4.0    0.5    0.0       adoxq	(%rdi), %rcx
+# CHECK-NEXT: 0.     2     1.5    0.5    0.0       adoxq	(%rdi), %rcx

diff  --git a/llvm/test/tools/llvm-mca/X86/Haswell/mulx-read-advance.s b/llvm/test/tools/llvm-mca/X86/Haswell/mulx-read-advance.s
index b781ebdef07f..8203ce27c44a 100644
--- a/llvm/test/tools/llvm-mca/X86/Haswell/mulx-read-advance.s
+++ b/llvm/test/tools/llvm-mca/X86/Haswell/mulx-read-advance.s
@@ -15,12 +15,12 @@ mulxq (%rdi), %rax, %rdx
 
 # CHECK:      Iterations:        2
 # CHECK-NEXT: Instructions:      2
-# CHECK-NEXT: Total Cycles:      21
+# CHECK-NEXT: Total Cycles:      16
 # CHECK-NEXT: Total uOps:        10
 
 # CHECK:      Dispatch Width:    4
-# CHECK-NEXT: uOps Per Cycle:    0.48
-# CHECK-NEXT: IPC:               0.10
+# CHECK-NEXT: uOps Per Cycle:    0.63
+# CHECK-NEXT: IPC:               0.13
 # CHECK-NEXT: Block RThroughput: 1.3
 
 # CHECK:      Instruction Info:
@@ -55,11 +55,11 @@ mulxq (%rdi), %rax, %rdx
 # CHECK-NEXT:  -      -     0.50   1.00   0.50   0.50    -     0.50   1.00    -     mulxl	(%rdi), %eax, %edx
 
 # CHECK:      Timeline view:
-# CHECK-NEXT:                     0123456789
-# CHECK-NEXT: Index     0123456789          0
+# CHECK-NEXT:                     012345
+# CHECK-NEXT: Index     0123456789
 
-# CHECK:      [0,0]     DeeeeeeeeeER   .    .   mulxl	(%rdi), %eax, %edx
-# CHECK-NEXT: [1,0]     . D=======eeeeeeeeeER   mulxl	(%rdi), %eax, %edx
+# CHECK:      [0,0]     DeeeeeeeeeER   .   mulxl	(%rdi), %eax, %edx
+# CHECK-NEXT: [1,0]     . D==eeeeeeeeeER   mulxl	(%rdi), %eax, %edx
 
 # CHECK:      Average Wait times (based on the timeline view):
 # CHECK-NEXT: [0]: Executions
@@ -68,18 +68,18 @@ mulxq (%rdi), %rax, %rdx
 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
 
 # CHECK:            [0]    [1]    [2]    [3]
-# CHECK-NEXT: 0.     2     4.5    0.5    0.0       mulxl	(%rdi), %eax, %edx
+# CHECK-NEXT: 0.     2     2.0    0.5    0.0       mulxl	(%rdi), %eax, %edx
 
 # CHECK:      [1] Code Region
 
 # CHECK:      Iterations:        2
 # CHECK-NEXT: Instructions:      2
-# CHECK-NEXT: Total Cycles:      21
+# CHECK-NEXT: Total Cycles:      16
 # CHECK-NEXT: Total uOps:        8
 
 # CHECK:      Dispatch Width:    4
-# CHECK-NEXT: uOps Per Cycle:    0.38
-# CHECK-NEXT: IPC:               0.10
+# CHECK-NEXT: uOps Per Cycle:    0.50
+# CHECK-NEXT: IPC:               0.13
 # CHECK-NEXT: Block RThroughput: 1.0
 
 # CHECK:      Instruction Info:
@@ -114,11 +114,11 @@ mulxq (%rdi), %rax, %rdx
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -     1.00    -     mulxq	(%rdi), %rax, %rdx
 
 # CHECK:      Timeline view:
-# CHECK-NEXT:                     0123456789
-# CHECK-NEXT: Index     0123456789          0
+# CHECK-NEXT:                     012345
+# CHECK-NEXT: Index     0123456789
 
-# CHECK:      [0,0]     DeeeeeeeeeER   .    .   mulxq	(%rdi), %rax, %rdx
-# CHECK-NEXT: [1,0]     .D========eeeeeeeeeER   mulxq	(%rdi), %rax, %rdx
+# CHECK:      [0,0]     DeeeeeeeeeER   .   mulxq	(%rdi), %rax, %rdx
+# CHECK-NEXT: [1,0]     .D===eeeeeeeeeER   mulxq	(%rdi), %rax, %rdx
 
 # CHECK:      Average Wait times (based on the timeline view):
 # CHECK-NEXT: [0]: Executions
@@ -127,4 +127,4 @@ mulxq (%rdi), %rax, %rdx
 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
 
 # CHECK:            [0]    [1]    [2]    [3]
-# CHECK-NEXT: 0.     2     5.0    0.5    0.0       mulxq	(%rdi), %rax, %rdx
+# CHECK-NEXT: 0.     2     2.5    0.5    0.0       mulxq	(%rdi), %rax, %rdx

diff  --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-bmi2.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-bmi2.s
index 68a2a4b9ec7e..32785f92675d 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-bmi2.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-bmi2.s
@@ -64,8 +64,8 @@ shrx        %rax, (%rbx), %rcx
 # CHECK-NEXT:  2      5     0.50    *                   bzhiq	%rax, (%rbx), %rcx
 # CHECK-NEXT:  1      3     2.00                        mulxl	%eax, %ebx, %ecx
 # CHECK-NEXT:  1      8     2.00    *                   mulxl	(%rax), %ebx, %ecx
-# CHECK-NEXT:  1      3     1.00                        mulxq	%rax, %rbx, %rcx
-# CHECK-NEXT:  1      8     1.00    *                   mulxq	(%rax), %rbx, %rcx
+# CHECK-NEXT:  1      3     2.00                        mulxq	%rax, %rbx, %rcx
+# CHECK-NEXT:  1      8     2.00    *                   mulxq	(%rax), %rbx, %rcx
 # CHECK-NEXT:  1      100   0.25                        pdepl	%eax, %ebx, %ecx
 # CHECK-NEXT:  1      100   0.25    *                   pdepl	(%rax), %ebx, %ecx
 # CHECK-NEXT:  1      100   0.25                        pdepq	%rax, %rbx, %rcx
@@ -107,7 +107,7 @@ shrx        %rax, (%rbx), %rcx
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]
-# CHECK-NEXT: 6.00   6.00   5.00   10.00  5.00   5.00    -      -      -      -      -     5.00
+# CHECK-NEXT: 6.00   6.00   5.00   9.00   5.00   5.00    -      -      -      -      -     8.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   Instructions:
@@ -116,9 +116,9 @@ shrx        %rax, (%rbx), %rcx
 # CHECK-NEXT:  -      -     0.25   0.25   0.25   0.25    -      -      -      -      -      -     bzhiq	%rax, %rbx, %rcx
 # CHECK-NEXT: 0.50   0.50   0.25   0.25   0.25   0.25    -      -      -      -      -      -     bzhiq	%rax, (%rbx), %rcx
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -     2.00   mulxl	%eax, %ebx, %ecx
-# CHECK-NEXT: 0.50   0.50    -     2.00    -      -      -      -      -      -      -     2.00   mulxl	(%rax), %ebx, %ecx
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -      -     mulxq	%rax, %rbx, %rcx
-# CHECK-NEXT: 0.50   0.50    -     1.00    -      -      -      -      -      -      -     1.00   mulxq	(%rax), %rbx, %rcx
+# CHECK-NEXT: 0.50   0.50    -     1.00    -      -      -      -      -      -      -     2.00   mulxl	(%rax), %ebx, %ecx
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -      -     2.00   mulxq	%rax, %rbx, %rcx
+# CHECK-NEXT: 0.50   0.50    -     1.00    -      -      -      -      -      -      -     2.00   mulxq	(%rax), %rbx, %rcx
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     pdepl	%eax, %ebx, %ecx
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     pdepl	(%rax), %ebx, %ecx
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     pdepq	%rax, %rbx, %rcx

diff  --git a/llvm/test/tools/llvm-mca/X86/Znver2/adcx-adox-read-advance.s b/llvm/test/tools/llvm-mca/X86/Znver2/adcx-adox-read-advance.s
index 9f1b4c68dc1a..3b2360e3237f 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver2/adcx-adox-read-advance.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver2/adcx-adox-read-advance.s
@@ -15,12 +15,12 @@ adox (%rdi), %rcx
 
 # CHECK:      Iterations:        2
 # CHECK-NEXT: Instructions:      2
-# CHECK-NEXT: Total Cycles:      13
+# CHECK-NEXT: Total Cycles:      9
 # CHECK-NEXT: Total uOps:        4
 
 # CHECK:      Dispatch Width:    4
-# CHECK-NEXT: uOps Per Cycle:    0.31
-# CHECK-NEXT: IPC:               0.15
+# CHECK-NEXT: uOps Per Cycle:    0.44
+# CHECK-NEXT: IPC:               0.22
 # CHECK-NEXT: Block RThroughput: 0.5
 
 # CHECK:      Instruction Info:
@@ -58,11 +58,10 @@ adox (%rdi), %rcx
 # CHECK-NEXT:  -     0.50   0.50    -      -     0.50   0.50    -      -      -      -      -      -     adcxq	(%rdi), %rcx
 
 # CHECK:      Timeline view:
-# CHECK-NEXT:                     012
-# CHECK-NEXT: Index     0123456789
+# CHECK-NEXT: Index     012345678
 
-# CHECK:      [0,0]     DeeeeeER  . .   adcxq	(%rdi), %rcx
-# CHECK-NEXT: [1,0]     D=====eeeeeER   adcxq	(%rdi), %rcx
+# CHECK:      [0,0]     DeeeeeER.   adcxq	(%rdi), %rcx
+# CHECK-NEXT: [1,0]     D=eeeeeER   adcxq	(%rdi), %rcx
 
 # CHECK:      Average Wait times (based on the timeline view):
 # CHECK-NEXT: [0]: Executions
@@ -71,18 +70,18 @@ adox (%rdi), %rcx
 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
 
 # CHECK:            [0]    [1]    [2]    [3]
-# CHECK-NEXT: 0.     2     3.5    0.5    0.0       adcxq	(%rdi), %rcx
+# CHECK-NEXT: 0.     2     1.5    0.5    0.0       adcxq	(%rdi), %rcx
 
 # CHECK:      [1] Code Region
 
 # CHECK:      Iterations:        2
 # CHECK-NEXT: Instructions:      2
-# CHECK-NEXT: Total Cycles:      13
+# CHECK-NEXT: Total Cycles:      9
 # CHECK-NEXT: Total uOps:        4
 
 # CHECK:      Dispatch Width:    4
-# CHECK-NEXT: uOps Per Cycle:    0.31
-# CHECK-NEXT: IPC:               0.15
+# CHECK-NEXT: uOps Per Cycle:    0.44
+# CHECK-NEXT: IPC:               0.22
 # CHECK-NEXT: Block RThroughput: 0.5
 
 # CHECK:      Instruction Info:
@@ -120,11 +119,10 @@ adox (%rdi), %rcx
 # CHECK-NEXT:  -     0.50   0.50    -      -     0.50   0.50    -      -      -      -      -      -     adoxq	(%rdi), %rcx
 
 # CHECK:      Timeline view:
-# CHECK-NEXT:                     012
-# CHECK-NEXT: Index     0123456789
+# CHECK-NEXT: Index     012345678
 
-# CHECK:      [0,0]     DeeeeeER  . .   adoxq	(%rdi), %rcx
-# CHECK-NEXT: [1,0]     D=====eeeeeER   adoxq	(%rdi), %rcx
+# CHECK:      [0,0]     DeeeeeER.   adoxq	(%rdi), %rcx
+# CHECK-NEXT: [1,0]     D=eeeeeER   adoxq	(%rdi), %rcx
 
 # CHECK:      Average Wait times (based on the timeline view):
 # CHECK-NEXT: [0]: Executions
@@ -133,4 +131,4 @@ adox (%rdi), %rcx
 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
 
 # CHECK:            [0]    [1]    [2]    [3]
-# CHECK-NEXT: 0.     2     3.5    0.5    0.0       adoxq	(%rdi), %rcx
+# CHECK-NEXT: 0.     2     1.5    0.5    0.0       adoxq	(%rdi), %rcx

diff  --git a/llvm/test/tools/llvm-mca/X86/Znver2/mulx-read-advance.s b/llvm/test/tools/llvm-mca/X86/Znver2/mulx-read-advance.s
index afa7044a3dc6..7dc5a954c826 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver2/mulx-read-advance.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver2/mulx-read-advance.s
@@ -15,12 +15,12 @@ mulxq (%rdi), %rax, %rdx
 
 # CHECK:      Iterations:        2
 # CHECK-NEXT: Instructions:      2
-# CHECK-NEXT: Total Cycles:      17
+# CHECK-NEXT: Total Cycles:      13
 # CHECK-NEXT: Total uOps:        2
 
 # CHECK:      Dispatch Width:    4
-# CHECK-NEXT: uOps Per Cycle:    0.12
-# CHECK-NEXT: IPC:               0.12
+# CHECK-NEXT: uOps Per Cycle:    0.15
+# CHECK-NEXT: IPC:               0.15
 # CHECK-NEXT: Block RThroughput: 2.0
 
 # CHECK:      Instruction Info:
@@ -51,18 +51,18 @@ mulxq (%rdi), %rax, %rdx
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]
-# CHECK-NEXT:  -     0.50   0.50    -     2.00    -      -      -      -      -      -      -     2.00
+# CHECK-NEXT:  -     0.50   0.50    -     1.00    -      -      -      -      -      -      -     2.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   Instructions:
-# CHECK-NEXT:  -     0.50   0.50    -     2.00    -      -      -      -      -      -      -     2.00   mulxl	(%rdi), %eax, %edx
+# CHECK-NEXT:  -     0.50   0.50    -     1.00    -      -      -      -      -      -      -     2.00   mulxl	(%rdi), %eax, %edx
 
 # CHECK:      Timeline view:
-# CHECK-NEXT:                     0123456
+# CHECK-NEXT:                     012
 # CHECK-NEXT: Index     0123456789
 
-# CHECK:      [0,0]     DeeeeeeeER.    ..   mulxl	(%rdi), %eax, %edx
-# CHECK-NEXT: [1,0]     D=======eeeeeeeER   mulxl	(%rdi), %eax, %edx
+# CHECK:      [0,0]     DeeeeeeeER. .   mulxl	(%rdi), %eax, %edx
+# CHECK-NEXT: [1,0]     D===eeeeeeeER   mulxl	(%rdi), %eax, %edx
 
 # CHECK:      Average Wait times (based on the timeline view):
 # CHECK-NEXT: [0]: Executions
@@ -71,19 +71,19 @@ mulxq (%rdi), %rax, %rdx
 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
 
 # CHECK:            [0]    [1]    [2]    [3]
-# CHECK-NEXT: 0.     2     4.5    0.5    0.0       mulxl	(%rdi), %eax, %edx
+# CHECK-NEXT: 0.     2     2.5    0.5    0.0       mulxl	(%rdi), %eax, %edx
 
 # CHECK:      [1] Code Region
 
 # CHECK:      Iterations:        2
 # CHECK-NEXT: Instructions:      2
-# CHECK-NEXT: Total Cycles:      17
+# CHECK-NEXT: Total Cycles:      13
 # CHECK-NEXT: Total uOps:        2
 
 # CHECK:      Dispatch Width:    4
-# CHECK-NEXT: uOps Per Cycle:    0.12
-# CHECK-NEXT: IPC:               0.12
-# CHECK-NEXT: Block RThroughput: 1.0
+# CHECK-NEXT: uOps Per Cycle:    0.15
+# CHECK-NEXT: IPC:               0.15
+# CHECK-NEXT: Block RThroughput: 2.0
 
 # CHECK:      Instruction Info:
 # CHECK-NEXT: [1]: #uOps
@@ -94,7 +94,7 @@ mulxq (%rdi), %rax, %rdx
 # CHECK-NEXT: [6]: HasSideEffects (U)
 
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
-# CHECK-NEXT:  1      7     1.00    *                   mulxq	(%rdi), %rax, %rdx
+# CHECK-NEXT:  1      7     2.00    *                   mulxq	(%rdi), %rax, %rdx
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0]   - Zn2AGU0
@@ -113,18 +113,18 @@ mulxq (%rdi), %rax, %rdx
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]
-# CHECK-NEXT:  -     0.50   0.50    -     1.00    -      -      -      -      -      -      -     1.00
+# CHECK-NEXT:  -     0.50   0.50    -     1.00    -      -      -      -      -      -      -     2.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   Instructions:
-# CHECK-NEXT:  -     0.50   0.50    -     1.00    -      -      -      -      -      -      -     1.00   mulxq	(%rdi), %rax, %rdx
+# CHECK-NEXT:  -     0.50   0.50    -     1.00    -      -      -      -      -      -      -     2.00   mulxq	(%rdi), %rax, %rdx
 
 # CHECK:      Timeline view:
-# CHECK-NEXT:                     0123456
+# CHECK-NEXT:                     012
 # CHECK-NEXT: Index     0123456789
 
-# CHECK:      [0,0]     DeeeeeeeER.    ..   mulxq	(%rdi), %rax, %rdx
-# CHECK-NEXT: [1,0]     D=======eeeeeeeER   mulxq	(%rdi), %rax, %rdx
+# CHECK:      [0,0]     DeeeeeeeER. .   mulxq	(%rdi), %rax, %rdx
+# CHECK-NEXT: [1,0]     D===eeeeeeeER   mulxq	(%rdi), %rax, %rdx
 
 # CHECK:      Average Wait times (based on the timeline view):
 # CHECK-NEXT: [0]: Executions
@@ -133,4 +133,4 @@ mulxq (%rdi), %rax, %rdx
 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
 
 # CHECK:            [0]    [1]    [2]    [3]
-# CHECK-NEXT: 0.     2     4.5    0.5    0.0       mulxq	(%rdi), %rax, %rdx
+# CHECK-NEXT: 0.     2     2.5    0.5    0.0       mulxq	(%rdi), %rax, %rdx

diff  --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-bmi2.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-bmi2.s
index 9f3521672d81..1562e859a6b2 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-bmi2.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-bmi2.s
@@ -64,8 +64,8 @@ shrx        %rax, (%rbx), %rcx
 # CHECK-NEXT:  2      5     0.33    *                   bzhiq	%rax, (%rbx), %rcx
 # CHECK-NEXT:  1      3     2.00                        mulxl	%eax, %ebx, %ecx
 # CHECK-NEXT:  1      7     2.00    *                   mulxl	(%rax), %ebx, %ecx
-# CHECK-NEXT:  1      3     1.00                        mulxq	%rax, %rbx, %rcx
-# CHECK-NEXT:  1      7     1.00    *                   mulxq	(%rax), %rbx, %rcx
+# CHECK-NEXT:  1      3     2.00                        mulxq	%rax, %rbx, %rcx
+# CHECK-NEXT:  1      7     2.00    *                   mulxq	(%rax), %rbx, %rcx
 # CHECK-NEXT:  1      100   0.25                        pdepl	%eax, %ebx, %ecx
 # CHECK-NEXT:  1      100   0.25    *                   pdepl	(%rax), %ebx, %ecx
 # CHECK-NEXT:  1      100   0.25                        pdepq	%rax, %rbx, %rcx
@@ -108,7 +108,7 @@ shrx        %rax, (%rbx), %rcx
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]
-# CHECK-NEXT: 4.00   4.00   4.00   5.00   10.00  5.00   5.00    -      -      -      -      -     5.00
+# CHECK-NEXT: 4.00   4.00   4.00   5.00   9.00   5.00   5.00    -      -      -      -      -     8.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   Instructions:
@@ -117,9 +117,9 @@ shrx        %rax, (%rbx), %rcx
 # CHECK-NEXT:  -      -      -     0.25   0.25   0.25   0.25    -      -      -      -      -      -     bzhiq	%rax, %rbx, %rcx
 # CHECK-NEXT: 0.33   0.33   0.33   0.25   0.25   0.25   0.25    -      -      -      -      -      -     bzhiq	%rax, (%rbx), %rcx
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -      -      -      -      -     2.00   mulxl	%eax, %ebx, %ecx
-# CHECK-NEXT: 0.33   0.33   0.33    -     2.00    -      -      -      -      -      -      -     2.00   mulxl	(%rax), %ebx, %ecx
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -      -      -      -      -      -     mulxq	%rax, %rbx, %rcx
-# CHECK-NEXT: 0.33   0.33   0.33    -     1.00    -      -      -      -      -      -      -     1.00   mulxq	(%rax), %rbx, %rcx
+# CHECK-NEXT: 0.33   0.33   0.33    -     1.00    -      -      -      -      -      -      -     2.00   mulxl	(%rax), %ebx, %ecx
+# CHECK-NEXT:  -      -      -      -     1.00    -      -      -      -      -      -      -     2.00   mulxq	%rax, %rbx, %rcx
+# CHECK-NEXT: 0.33   0.33   0.33    -     1.00    -      -      -      -      -      -      -     2.00   mulxq	(%rax), %rbx, %rcx
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     pdepl	%eax, %ebx, %ecx
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     pdepl	(%rax), %ebx, %ecx
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     pdepq	%rax, %rbx, %rcx


        


More information about the llvm-commits mailing list