[llvm] [X86] Fix some values for Znver4 model (PR #161405)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 1 12:49:56 PDT 2025
https://github.com/NexusXe updated https://github.com/llvm/llvm-project/pull/161405
>From fe000855c6d416e2e6a1bca94216874d1f5082fa Mon Sep 17 00:00:00 2001
From: NexusXe <andastrike at gmail.com>
Date: Tue, 30 Sep 2025 11:46:24 -0500
Subject: [PATCH 01/20] fix documentation reference
---
llvm/lib/Target/X86/X86ScheduleZnver4.td | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver4.td b/llvm/lib/Target/X86/X86ScheduleZnver4.td
index cc300548a50e6..384c7fc591490 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver4.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver4.td
@@ -15,7 +15,7 @@
//===----------------------------------------------------------------------===//
def Znver4Model : SchedMachineModel {
- // AMD SOG Zen4, 2.9.6 Dispatch
+ // AMD SOG Zen4, 2.9.8 Dispatch
// The processor may dispatch up to 6 macro ops per cycle
// into the execution engine.
let IssueWidth = 6;
>From 092492a2ae49c392c122b076eaa913cddc4ae0f5 Mon Sep 17 00:00:00 2001
From: NexusXe <andastrike at gmail.com>
Date: Tue, 30 Sep 2025 11:46:39 -0500
Subject: [PATCH 02/20] better HighLatency value
---
llvm/lib/Target/X86/X86ScheduleZnver4.td | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver4.td b/llvm/lib/Target/X86/X86ScheduleZnver4.td
index 384c7fc591490..2a8e614d62512 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver4.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver4.td
@@ -46,8 +46,9 @@ def Znver4Model : SchedMachineModel {
int VecLoadLatency = 7;
// Latency of a simple store operation.
int StoreLatency = 1;
- // FIXME:
- let HighLatency = 25; // FIXME: any better choice?
+ // Mean and median value for all instructions with latencies >6
+ // Source: Zen4 Instruction Latencies spreadsheet (included with SOG)
+ let HighLatency = 13;
// AMD SOG Zen4, 2.8 Optimizing Branching
// The branch misprediction penalty is in the range from 11 to 18 cycles,
// <...>. The common case penalty is 13 cycles.
>From bc45c699cd87b6fcfc040d99001caf580dc593ef Mon Sep 17 00:00:00 2001
From: NexusXe <andastrike at gmail.com>
Date: Tue, 30 Sep 2025 11:47:08 -0500
Subject: [PATCH 03/20] LEA metrics
---
llvm/lib/Target/X86/X86ScheduleZnver4.td | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver4.td b/llvm/lib/Target/X86/X86ScheduleZnver4.td
index 2a8e614d62512..55a4c4a1388b7 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver4.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver4.td
@@ -586,10 +586,11 @@ def : InstRW<[Zn4WriteADC8mr_SBB8mr], (instrs ADC8mr, SBB8mr)>;
defm : Zn4WriteResInt<WriteLEA, [Zn4AGU012], 1, [1], 1>; // LEA instructions can't fold loads.
// This write is used for slow LEA instructions.
+// values from uops.info
def Zn4Write3OpsLEA : SchedWriteRes<[Zn4ALU0123]> {
- let Latency = 2;
- let ReleaseAtCycles = [1];
- let NumMicroOps = 2;
+ let Latency = 3;
+ let ReleaseAtCycles = [1, 1, 1, 2];
+ let NumMicroOps = 4;
}
// On Znver4, a slow LEA is either a 3Ops LEA (base, index, offset),
@@ -613,9 +614,10 @@ def Zn4WriteLEA : SchedWriteVariant<[
def : InstRW<[Zn4WriteLEA], (instrs LEA32r, LEA64r, LEA64_32r)>;
+// values from uops.info
def Zn4SlowLEA16r : SchedWriteRes<[Zn4ALU0123]> {
- let Latency = 2; // FIXME: not from llvm-exegesis
- let ReleaseAtCycles = [4];
+ let Latency = 2;
+ let ReleaseAtCycles = [1, 1];
let NumMicroOps = 2;
}
>From fd6f87f72413c97895846c9fa015dbbfa58a44c0 Mon Sep 17 00:00:00 2001
From: NexusXe <andastrike at gmail.com>
Date: Tue, 30 Sep 2025 11:47:33 -0500
Subject: [PATCH 04/20] (CMP)XCHG uops
---
llvm/lib/Target/X86/X86ScheduleZnver4.td | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver4.td b/llvm/lib/Target/X86/X86ScheduleZnver4.td
index 55a4c4a1388b7..8fec28cb34118 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver4.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver4.td
@@ -663,14 +663,14 @@ def : InstRW<[Zn4WriteCMPXCHG8rm_LCMPXCHG8], (instrs CMPXCHG8rm, LCMPXCHG8)>;
def Zn4WriteCMPXCHG8B : SchedWriteRes<[Zn4ALU0123]> {
let Latency = 3; // FIXME: not from llvm-exegesis
let ReleaseAtCycles = [24];
- let NumMicroOps = 19;
+ let NumMicroOps = 15;
}
def : InstRW<[Zn4WriteCMPXCHG8B], (instrs CMPXCHG8B)>;
def Zn4WriteCMPXCHG16B_LCMPXCHG16B : SchedWriteRes<[Zn4ALU0123]> {
let Latency = 4; // FIXME: not from llvm-exegesis
let ReleaseAtCycles = [59];
- let NumMicroOps = 28;
+ let NumMicroOps = 26;
}
def : InstRW<[Zn4WriteCMPXCHG16B_LCMPXCHG16B], (instrs CMPXCHG16B, LCMPXCHG16B)>;
@@ -684,7 +684,7 @@ def : InstRW<[Zn4WriteWriteXCHGUnrenameable], (instrs XCHG8rr, XCHG16rr, XCHG16a
def Zn4WriteXCHG8rm_XCHG16rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU0123]> {
let Latency = !add(Znver4Model.LoadLatency, 3); // FIXME: not from llvm-exegesis
let ReleaseAtCycles = [1, 1, 2];
- let NumMicroOps = 5;
+ let NumMicroOps = 2;
}
def : InstRW<[Zn4WriteXCHG8rm_XCHG16rm], (instrs XCHG8rm, XCHG16rm)>;
>From a67c2522f3d25c572d122687dbc476dad80a0193 Mon Sep 17 00:00:00 2001
From: NexusXe <andastrike at gmail.com>
Date: Tue, 30 Sep 2025 11:47:53 -0500
Subject: [PATCH 05/20] (I)DIV values from docs/uops.info
---
llvm/lib/Target/X86/X86ScheduleZnver4.td | 18 ++++++++----------
1 file changed, 8 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver4.td b/llvm/lib/Target/X86/X86ScheduleZnver4.td
index 8fec28cb34118..3352a9beb516a 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver4.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver4.td
@@ -696,16 +696,14 @@ def Zn4WriteXCHG32rm_XCHG64rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU0123]>
def : InstRW<[Zn4WriteXCHG32rm_XCHG64rm], (instrs XCHG32rm, XCHG64rm)>;
// Integer division.
-// FIXME: uops for 8-bit division measures as 2. for others it's a guess.
-// FIXME: latency for 8-bit division measures as 10. for others it's a guess.
-defm : Zn4WriteResIntPair<WriteDiv8, [Zn4Divider], 10, [10], 2>;
-defm : Zn4WriteResIntPair<WriteDiv16, [Zn4Divider], 11, [11], 2>;
-defm : Zn4WriteResIntPair<WriteDiv32, [Zn4Divider], 13, [13], 2>;
-defm : Zn4WriteResIntPair<WriteDiv64, [Zn4Divider], 17, [17], 2>;
-defm : Zn4WriteResIntPair<WriteIDiv8, [Zn4Divider], 10, [10], 2>;
-defm : Zn4WriteResIntPair<WriteIDiv16, [Zn4Divider], 11, [11], 2>;
-defm : Zn4WriteResIntPair<WriteIDiv32, [Zn4Divider], 13, [13], 2>;
-defm : Zn4WriteResIntPair<WriteIDiv64, [Zn4Divider], 17, [17], 2>;
+defm : Zn4WriteResIntPair<WriteDiv8, [Zn4Divider], 9, [9], 2>;
+defm : Zn4WriteResIntPair<WriteDiv16, [Zn4Divider], 10, [10], 2>;
+defm : Zn4WriteResIntPair<WriteDiv32, [Zn4Divider], 12, [12], 2>;
+defm : Zn4WriteResIntPair<WriteDiv64, [Zn4Divider], 18, [18], 2>;
+defm : Zn4WriteResIntPair<WriteIDiv8, [Zn4Divider], 9, [9], 2>;
+defm : Zn4WriteResIntPair<WriteIDiv16, [Zn4Divider], 10, [10], 2>;
+defm : Zn4WriteResIntPair<WriteIDiv32, [Zn4Divider], 12, [12], 2>;
+defm : Zn4WriteResIntPair<WriteIDiv64, [Zn4Divider], 18, [18], 2>;
defm : Zn4WriteResIntPair<WriteBSF, [Zn4ALU1], 1, [1], 6, /*LoadUOps=*/1>; // Bit scan forward.
defm : Zn4WriteResIntPair<WriteBSR, [Zn4ALU1], 1, [1], 6, /*LoadUOps=*/1>; // Bit scan reverse.
>From cc5fc0dc2e153e42db16a858d3b7775db38df674 Mon Sep 17 00:00:00 2001
From: NexusXe <andastrike at gmail.com>
Date: Tue, 30 Sep 2025 11:48:13 -0500
Subject: [PATCH 06/20] use zen4 BSF/BSR uops count
---
llvm/lib/Target/X86/X86ScheduleZnver4.td | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver4.td b/llvm/lib/Target/X86/X86ScheduleZnver4.td
index 3352a9beb516a..9aa31b31a992c 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver4.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver4.td
@@ -705,8 +705,8 @@ defm : Zn4WriteResIntPair<WriteIDiv16, [Zn4Divider], 10, [10], 2>;
defm : Zn4WriteResIntPair<WriteIDiv32, [Zn4Divider], 12, [12], 2>;
defm : Zn4WriteResIntPair<WriteIDiv64, [Zn4Divider], 18, [18], 2>;
-defm : Zn4WriteResIntPair<WriteBSF, [Zn4ALU1], 1, [1], 6, /*LoadUOps=*/1>; // Bit scan forward.
-defm : Zn4WriteResIntPair<WriteBSR, [Zn4ALU1], 1, [1], 6, /*LoadUOps=*/1>; // Bit scan reverse.
+defm : Zn4WriteResIntPair<WriteBSF, [Zn4ALU1], 1, [1], 1, /*LoadUOps=*/1>; // Bit scan forward.
+defm : Zn4WriteResIntPair<WriteBSR, [Zn4ALU1], 1, [1], 1, /*LoadUOps=*/1>; // Bit scan reverse.
defm : Zn4WriteResIntPair<WritePOPCNT, [Zn4ALU0123], 1, [1], 1>; // Bit population count.
>From 3555807bcc1b8966968f1412341bfbee0f7322b9 Mon Sep 17 00:00:00 2001
From: NexusXe <andastrike at gmail.com>
Date: Tue, 30 Sep 2025 11:49:10 -0500
Subject: [PATCH 07/20] TZCNT in 1 uop
---
llvm/lib/Target/X86/X86ScheduleZnver4.td | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver4.td b/llvm/lib/Target/X86/X86ScheduleZnver4.td
index 9aa31b31a992c..fc454eb5ddee0 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver4.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver4.td
@@ -726,12 +726,12 @@ def Zn4WriteLZCNT16rr : SchedWriteRes<[Zn4ALU0123]> {
}
def : InstRW<[Zn4WriteLZCNT16rr], (instrs LZCNT16rr)>;
-defm : Zn4WriteResIntPair<WriteTZCNT, [Zn4ALU12], 2, [1], 2>; // Trailing zero count.
+defm : Zn4WriteResIntPair<WriteTZCNT, [Zn4ALU12], 1, [1], 1>; // Trailing zero count.
def Zn4WriteTZCNT16rr : SchedWriteRes<[Zn4ALU0123]> {
- let Latency = 2;
- let ReleaseAtCycles = [4];
- let NumMicroOps = 2;
+ let Latency = 1;
+ let ReleaseAtCycles = [1];
+ let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteTZCNT16rr], (instrs TZCNT16rr)>;
>From 48d84d299357444d890d27795d81f1540124b42e Mon Sep 17 00:00:00 2001
From: NexusXe <andastrike at gmail.com>
Date: Tue, 30 Sep 2025 11:49:52 -0500
Subject: [PATCH 08/20] some higher latency string instructions
---
llvm/lib/Target/X86/X86ScheduleZnver4.td | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver4.td b/llvm/lib/Target/X86/X86ScheduleZnver4.td
index fc454eb5ddee0..86793f4de1dd0 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver4.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver4.td
@@ -1327,9 +1327,9 @@ def : InstRW<[Zn4WriteSHA256RNDS2rr], (instrs SHA256RNDS2rr)>;
// Strings instructions.
// Packed Compare Implicit Length Strings, Return Mask
-defm : Zn4WriteResXMMPair<WritePCmpIStrM, [Zn4FPVAdd0123], 6, [8], 3, /*LoadUOps=*/1>;
+defm : Zn4WriteResXMMPair<WritePCmpIStrM, [Zn4FPVAdd0123], 7, [8], 3, /*LoadUOps=*/1>;
// Packed Compare Explicit Length Strings, Return Mask
-defm : Zn4WriteResXMMPair<WritePCmpEStrM, [Zn4FPVAdd0123], 6, [12], 7, /*LoadUOps=*/5>;
+defm : Zn4WriteResXMMPair<WritePCmpEStrM, [Zn4FPVAdd0123], 7, [12], 7, /*LoadUOps=*/5>;
// Packed Compare Implicit Length Strings, Return Index
defm : Zn4WriteResXMMPair<WritePCmpIStrI, [Zn4FPVAdd0123], 2, [8], 4>;
// Packed Compare Explicit Length Strings, Return Index
>From 8d345ddfec6ae30a9bd006503c8fbc0ba93ee54e Mon Sep 17 00:00:00 2001
From: NexusXe <andastrike at gmail.com>
Date: Tue, 30 Sep 2025 11:55:18 -0500
Subject: [PATCH 09/20] use Zen4 CLMUL/VPERM(S/D) values
---
llvm/lib/Target/X86/X86ScheduleZnver4.td | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver4.td b/llvm/lib/Target/X86/X86ScheduleZnver4.td
index 86793f4de1dd0..405c0a2e9fbd1 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver4.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver4.td
@@ -1341,7 +1341,7 @@ defm : Zn4WriteResXMMPair<WriteAESIMC, [Zn4FPAES01], 4, [1], 1>; // InvMixColumn
defm : Zn4WriteResXMMPair<WriteAESKeyGen, [Zn4FPAES01], 4, [1], 1>; // Key Generation.
// Carry-less multiplication instructions.
-defm : Zn4WriteResXMMPair<WriteCLMul, [Zn4FPCLM01], 4, [4], 4>;
+defm : Zn4WriteResXMMPair<WriteCLMul, [Zn4FPCLM01], 4, [3], 4>;
// EMMS/FEMMS
defm : Zn4WriteResInt<WriteEMMS, [Zn4ALU0123], 2, [1], 1>; // FIXME: latency not from llvm-exegesis
@@ -1387,23 +1387,23 @@ def Zn4WriteVPERM2F128rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPVShuf]> {
def : InstRW<[Zn4WriteVPERM2F128rm], (instrs VPERM2F128rmi)>;
def Zn4WriteVPERMPSYrr : SchedWriteRes<[Zn4FPVShuf]> {
- let Latency = 7;
+ let Latency = 4;
let ReleaseAtCycles = [1];
- let NumMicroOps = 2;
+ let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteVPERMPSYrr], (instrs VPERMPSYrr)>;
def Zn4WriteVPERMPSYrm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPVShuf]> {
let Latency = !add(Znver4Model.VecLoadLatency, Zn4WriteVPERMPSYrr.Latency);
- let ReleaseAtCycles = [1, 1, 2];
- let NumMicroOps = !add(Zn4WriteVPERMPSYrr.NumMicroOps, 1);
+ let ReleaseAtCycles = [1];
+ let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteVPERMPSYrm], (instrs VPERMPSYrm)>;
def Zn4WriteVPERMYri : SchedWriteRes<[Zn4FPVShuf]> {
- let Latency = 6;
+ let Latency = 4;
let ReleaseAtCycles = [1];
- let NumMicroOps = 2;
+ let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteVPERMYri], (instrs VPERMPDYri, VPERMQYri)>;
@@ -1415,9 +1415,9 @@ def Zn4WriteVPERMPDYmi : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPVShuf]> {
def : InstRW<[Zn4WriteVPERMPDYmi], (instrs VPERMPDYmi)>;
def Zn4WriteVPERMDYrr : SchedWriteRes<[Zn4FPVShuf]> {
- let Latency = 5;
+ let Latency = 4;
let ReleaseAtCycles = [1];
- let NumMicroOps = 2;
+ let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteVPERMDYrr], (instrs VPERMDYrr)>;
>From d1bf965cfd2ab0ac7fb4c678a5d538c08abf0185 Mon Sep 17 00:00:00 2001
From: NexusXe <andastrike at gmail.com>
Date: Tue, 30 Sep 2025 12:03:31 -0500
Subject: [PATCH 10/20] replace FIXME for latency
---
llvm/lib/Target/X86/X86ScheduleZnver4.td | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver4.td b/llvm/lib/Target/X86/X86ScheduleZnver4.td
index 405c0a2e9fbd1..421c6eb6bf0b9 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver4.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver4.td
@@ -616,7 +616,7 @@ def : InstRW<[Zn4WriteLEA], (instrs LEA32r, LEA64r, LEA64_32r)>;
// values from uops.info
def Zn4SlowLEA16r : SchedWriteRes<[Zn4ALU0123]> {
- let Latency = 2;
+ let Latency = 2; // FIXME: not from llvm-exegesis
let ReleaseAtCycles = [1, 1];
let NumMicroOps = 2;
}
>From de77a33cf2f78c6d1f8e18a30e120fa56f04c076 Mon Sep 17 00:00:00 2001
From: NexusXe <andastrike at gmail.com>
Date: Tue, 30 Sep 2025 14:22:16 -0500
Subject: [PATCH 11/20] revert multiop ReleaseAtCycles value
---
llvm/lib/Target/X86/X86ScheduleZnver4.td | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver4.td b/llvm/lib/Target/X86/X86ScheduleZnver4.td
index 421c6eb6bf0b9..9d6b57d51808a 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver4.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver4.td
@@ -617,7 +617,7 @@ def : InstRW<[Zn4WriteLEA], (instrs LEA32r, LEA64r, LEA64_32r)>;
// values from uops.info
def Zn4SlowLEA16r : SchedWriteRes<[Zn4ALU0123]> {
let Latency = 2; // FIXME: not from llvm-exegesis
- let ReleaseAtCycles = [1, 1];
+ let ReleaseAtCycles = [4];
let NumMicroOps = 2;
}
>From de0daad87f44107bd37d6bfca0dc07c66da61f71 Mon Sep 17 00:00:00 2001
From: NexusXe <andastrike at gmail.com>
Date: Tue, 30 Sep 2025 14:40:19 -0500
Subject: [PATCH 12/20] fix RAC for Zn4WriteVPERMPSYrm
---
llvm/lib/Target/X86/X86ScheduleZnver4.td | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver4.td b/llvm/lib/Target/X86/X86ScheduleZnver4.td
index 9d6b57d51808a..1346ac8bfad91 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver4.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver4.td
@@ -1395,7 +1395,7 @@ def : InstRW<[Zn4WriteVPERMPSYrr], (instrs VPERMPSYrr)>;
def Zn4WriteVPERMPSYrm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPVShuf]> {
let Latency = !add(Znver4Model.VecLoadLatency, Zn4WriteVPERMPSYrr.Latency);
- let ReleaseAtCycles = [1];
+ let ReleaseAtCycles = [1, 1, 1];
let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteVPERMPSYrm], (instrs VPERMPSYrm)>;
>From a6621ca591882056694cfcf98e981d351f1a181a Mon Sep 17 00:00:00 2001
From: NexusXe <andastrike at gmail.com>
Date: Tue, 30 Sep 2025 14:41:04 -0500
Subject: [PATCH 13/20] fix RAC for Zn4Write3OpsLEA
---
llvm/lib/Target/X86/X86ScheduleZnver4.td | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver4.td b/llvm/lib/Target/X86/X86ScheduleZnver4.td
index 1346ac8bfad91..9e3939b60408f 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver4.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver4.td
@@ -589,7 +589,7 @@ defm : Zn4WriteResInt<WriteLEA, [Zn4AGU012], 1, [1], 1>; // LEA instructions
// values from uops.info
def Zn4Write3OpsLEA : SchedWriteRes<[Zn4ALU0123]> {
let Latency = 3;
- let ReleaseAtCycles = [1, 1, 1, 2];
+ let ReleaseAtCycles = [1];
let NumMicroOps = 4;
}
>From 603d9b8998310cb051183e4599d34f4e18b5ff7a Mon Sep 17 00:00:00 2001
From: NexusXe <andastrike at gmail.com>
Date: Wed, 1 Oct 2025 12:13:48 -0500
Subject: [PATCH 14/20] regenerate resource files
---
.../llvm-mca/X86/Znver4/resources-avx1.s | 18 ++--
.../llvm-mca/X86/Znver4/resources-avx2.s | 20 ++--
.../X86/Znver4/resources-avx512vpclmulqdq.s | 10 +-
.../X86/Znver4/resources-avx512vpclmulqdqvl.s | 18 ++--
.../llvm-mca/X86/Znver4/resources-bmi1.s | 16 +--
.../llvm-mca/X86/Znver4/resources-cmpxchg.s | 8 +-
.../tools/llvm-mca/X86/Znver4/resources-lea.s | 80 +++++++--------
.../llvm-mca/X86/Znver4/resources-pclmul.s | 10 +-
.../llvm-mca/X86/Znver4/resources-sse42.s | 8 +-
.../X86/Znver4/resources-vpclmulqdq.s | 10 +-
.../llvm-mca/X86/Znver4/resources-x86_64.s | 98 +++++++++----------
11 files changed, 148 insertions(+), 148 deletions(-)
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx1.s
index 1ffe53366fdb0..d1df30497325b 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx1.s
@@ -1403,8 +1403,8 @@ vzeroupper
# CHECK-NEXT: 1 8 0.50 * vpblendvb %xmm3, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.25 vpblendw $11, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 0.50 * vpblendw $11, (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 4 4 2.00 vpclmulqdq $11, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 4 11 2.00 * vpclmulqdq $11, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 4 4 1.50 vpclmulqdq $11, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 4 11 1.50 * vpclmulqdq $11, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.25 vpcmpeqb %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 0.50 * vpcmpeqb (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.25 vpcmpeqd %xmm0, %xmm1, %xmm2
@@ -1415,8 +1415,8 @@ vzeroupper
# CHECK-NEXT: 1 8 0.50 * vpcmpeqw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 8 6 3.00 vpcmpestri $1, %xmm0, %xmm2
# CHECK-NEXT: 12 13 3.00 * vpcmpestri $1, (%rax), %xmm2
-# CHECK-NEXT: 7 6 3.00 vpcmpestrm $1, %xmm0, %xmm2
-# CHECK-NEXT: 12 13 3.00 * vpcmpestrm $1, (%rax), %xmm2
+# CHECK-NEXT: 7 7 3.00 vpcmpestrm $1, %xmm0, %xmm2
+# CHECK-NEXT: 12 14 3.00 * vpcmpestrm $1, (%rax), %xmm2
# CHECK-NEXT: 1 1 0.25 vpcmpgtb %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 0.50 * vpcmpgtb (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.25 vpcmpgtd %xmm0, %xmm1, %xmm2
@@ -1427,8 +1427,8 @@ vzeroupper
# CHECK-NEXT: 1 8 0.50 * vpcmpgtw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 4 2 2.00 vpcmpistri $1, %xmm0, %xmm2
# CHECK-NEXT: 4 9 2.00 * vpcmpistri $1, (%rax), %xmm2
-# CHECK-NEXT: 3 6 2.00 vpcmpistrm $1, %xmm0, %xmm2
-# CHECK-NEXT: 4 13 2.00 * vpcmpistrm $1, (%rax), %xmm2
+# CHECK-NEXT: 3 7 2.00 vpcmpistrm $1, %xmm0, %xmm2
+# CHECK-NEXT: 4 14 2.00 * vpcmpistrm $1, (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 vperm2f128 $1, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 10 1.00 * vperm2f128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vpermilpd $1, %xmm0, %xmm2
@@ -1749,7 +1749,7 @@ vzeroupper
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT: 1.33 1.33 1.33 16.50 16.50 16.50 16.50 - 205.25 393.58 268.08 158.08 208.50 208.50 65.00 119.67 119.67 119.67 107.00 107.00 107.00 19.00 19.00
+# CHECK-NEXT: 1.33 1.33 1.33 16.50 16.50 16.50 16.50 - 204.25 392.58 268.08 158.08 208.50 208.50 65.00 119.67 119.67 119.67 107.00 107.00 107.00 19.00 19.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
@@ -2126,8 +2126,8 @@ vzeroupper
# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpblendvb %xmm3, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpblendw $11, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpblendw $11, (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - - - - - - - - - - vpclmulqdq $11, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpclmulqdq $11, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - - - - - - 1.50 1.50 - - - - - - - - - - - - - vpclmulqdq $11, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - - - - - - - 1.50 1.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpclmulqdq $11, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpcmpeqb %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpcmpeqb (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpcmpeqd %xmm0, %xmm1, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx2.s
index 6dc5bacde9059..2851632869865 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx2.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx2.s
@@ -560,14 +560,14 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 8 0.50 * vpcmpgtw (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 1.00 vperm2i128 $1, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 8 1.00 * vperm2i128 $1, (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 2 5 1.00 vpermd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 2 12 2.00 * vpermd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 2 6 1.00 vpermpd $1, %ymm0, %ymm2
-# CHECK-NEXT: 3 13 2.00 * vpermpd $1, (%rax), %ymm2
-# CHECK-NEXT: 2 7 1.00 vpermps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 3 14 2.00 * vpermps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 2 6 1.00 vpermq $1, %ymm0, %ymm2
-# CHECK-NEXT: 2 12 2.00 * vpermq $1, (%rax), %ymm2
+# CHECK-NEXT: 1 4 1.00 vpermd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 11 2.00 * vpermd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 4 1.00 vpermpd $1, %ymm0, %ymm2
+# CHECK-NEXT: 2 11 2.00 * vpermpd $1, (%rax), %ymm2
+# CHECK-NEXT: 1 4 1.00 vpermps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 11 1.00 * vpermps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 4 1.00 vpermq $1, %ymm0, %ymm2
+# CHECK-NEXT: 1 11 2.00 * vpermq $1, (%rax), %ymm2
# CHECK-NEXT: 1 5 0.33 * vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 1 5 0.33 * vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: 1 5 0.33 * vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2
@@ -789,7 +789,7 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT: 6.67 6.67 6.67 - - - - - 93.75 132.75 92.25 36.25 80.50 80.50 29.00 52.33 52.33 52.33 50.67 50.67 50.67 2.50 2.50
+# CHECK-NEXT: 6.67 6.67 6.67 - - - - - 93.75 131.75 92.25 36.25 80.50 80.50 29.00 52.33 52.33 52.33 50.67 50.67 50.67 2.50 2.50
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
@@ -898,7 +898,7 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - vpermpd $1, %ymm0, %ymm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 2.00 - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpermpd $1, (%rax), %ymm2
# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - vpermps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 2.00 - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpermps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 1.00 - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpermps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - vpermq $1, %ymm0, %ymm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 2.00 - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpermq $1, (%rax), %ymm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vpclmulqdq.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vpclmulqdq.s
index 87ba0607e71d1..d1f2a980ee444 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vpclmulqdq.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vpclmulqdq.s
@@ -13,8 +13,8 @@ vpclmulqdq $11, (%rax), %zmm17, %zmm19
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 4 4 2.00 vpclmulqdq $11, %zmm16, %zmm17, %zmm19
-# CHECK-NEXT: 4 11 2.00 * vpclmulqdq $11, (%rax), %zmm17, %zmm19
+# CHECK-NEXT: 4 4 1.50 vpclmulqdq $11, %zmm16, %zmm17, %zmm19
+# CHECK-NEXT: 4 11 1.50 * vpclmulqdq $11, (%rax), %zmm17, %zmm19
# CHECK: Resources:
# CHECK-NEXT: [0] - Zn4AGU0
@@ -43,9 +43,9 @@ vpclmulqdq $11, (%rax), %zmm17, %zmm19
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - -
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
-# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - - - - - - - - - - vpclmulqdq $11, %zmm16, %zmm17, %zmm19
-# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpclmulqdq $11, (%rax), %zmm17, %zmm19
+# CHECK-NEXT: - - - - - - - - 1.50 1.50 - - - - - - - - - - - - - vpclmulqdq $11, %zmm16, %zmm17, %zmm19
+# CHECK-NEXT: - - - - - - - - 1.50 1.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpclmulqdq $11, (%rax), %zmm17, %zmm19
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vpclmulqdqvl.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vpclmulqdqvl.s
index 3c80c567227c5..ea7a28027a782 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vpclmulqdqvl.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vpclmulqdqvl.s
@@ -16,10 +16,10 @@ vpclmulqdq $11, (%rax), %ymm17, %ymm19
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 4 4 2.00 vpclmulqdq $11, %xmm16, %xmm17, %xmm19
-# CHECK-NEXT: 4 11 2.00 * vpclmulqdq $11, (%rax), %xmm17, %xmm19
-# CHECK-NEXT: 4 4 2.00 vpclmulqdq $11, %ymm16, %ymm17, %ymm19
-# CHECK-NEXT: 4 11 2.00 * vpclmulqdq $11, (%rax), %ymm17, %ymm19
+# CHECK-NEXT: 4 4 1.50 vpclmulqdq $11, %xmm16, %xmm17, %xmm19
+# CHECK-NEXT: 4 11 1.50 * vpclmulqdq $11, (%rax), %xmm17, %xmm19
+# CHECK-NEXT: 4 4 1.50 vpclmulqdq $11, %ymm16, %ymm17, %ymm19
+# CHECK-NEXT: 4 11 1.50 * vpclmulqdq $11, (%rax), %ymm17, %ymm19
# CHECK: Resources:
# CHECK-NEXT: [0] - Zn4AGU0
@@ -48,11 +48,11 @@ vpclmulqdq $11, (%rax), %ymm17, %ymm19
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT: - - - - - - - - 8.00 8.00 - - 1.00 1.00 - 0.67 0.67 0.67 0.67 0.67 0.67 - -
+# CHECK-NEXT: - - - - - - - - 6.00 6.00 - - 1.00 1.00 - 0.67 0.67 0.67 0.67 0.67 0.67 - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
-# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - - - - - - - - - - vpclmulqdq $11, %xmm16, %xmm17, %xmm19
-# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpclmulqdq $11, (%rax), %xmm17, %xmm19
-# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - - - - - - - - - - vpclmulqdq $11, %ymm16, %ymm17, %ymm19
-# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpclmulqdq $11, (%rax), %ymm17, %ymm19
+# CHECK-NEXT: - - - - - - - - 1.50 1.50 - - - - - - - - - - - - - vpclmulqdq $11, %xmm16, %xmm17, %xmm19
+# CHECK-NEXT: - - - - - - - - 1.50 1.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpclmulqdq $11, (%rax), %xmm17, %xmm19
+# CHECK-NEXT: - - - - - - - - 1.50 1.50 - - - - - - - - - - - - - vpclmulqdq $11, %ymm16, %ymm17, %ymm19
+# CHECK-NEXT: - - - - - - - - 1.50 1.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpclmulqdq $11, (%rax), %ymm17, %ymm19
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-bmi1.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-bmi1.s
index f4888cf81523f..afbd566751c95 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-bmi1.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-bmi1.s
@@ -69,12 +69,12 @@ tzcnt (%rax), %rcx
# CHECK-NEXT: 2 5 0.33 * blsrl (%rax), %ecx
# CHECK-NEXT: 1 1 0.25 blsrq %rax, %rcx
# CHECK-NEXT: 2 5 0.33 * blsrq (%rax), %rcx
-# CHECK-NEXT: 2 2 1.00 tzcntw %ax, %cx
-# CHECK-NEXT: 2 6 0.50 * tzcntw (%rax), %cx
-# CHECK-NEXT: 2 2 0.50 tzcntl %eax, %ecx
-# CHECK-NEXT: 2 6 0.50 * tzcntl (%rax), %ecx
-# CHECK-NEXT: 2 2 0.50 tzcntq %rax, %rcx
-# CHECK-NEXT: 2 6 0.50 * tzcntq (%rax), %rcx
+# CHECK-NEXT: 1 1 0.25 tzcntw %ax, %cx
+# CHECK-NEXT: 1 5 0.50 * tzcntw (%rax), %cx
+# CHECK-NEXT: 1 1 0.50 tzcntl %eax, %ecx
+# CHECK-NEXT: 1 5 0.50 * tzcntl (%rax), %ecx
+# CHECK-NEXT: 1 1 0.50 tzcntq %rax, %rcx
+# CHECK-NEXT: 1 5 0.50 * tzcntq (%rax), %rcx
# CHECK: Resources:
# CHECK-NEXT: [0] - Zn4AGU0
@@ -103,7 +103,7 @@ tzcnt (%rax), %rcx
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT: 4.33 4.33 4.33 5.00 9.50 9.50 5.00 - - - - - - - - 4.33 4.33 4.33 4.33 4.33 4.33 - -
+# CHECK-NEXT: 4.33 4.33 4.33 4.25 8.75 8.75 4.25 - - - - - - - - 4.33 4.33 4.33 4.33 4.33 4.33 - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
@@ -127,7 +127,7 @@ tzcnt (%rax), %rcx
# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - blsrl (%rax), %ecx
# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - blsrq %rax, %rcx
# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - blsrq (%rax), %rcx
-# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - - - - - - - - - - tzcntw %ax, %cx
+# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - tzcntw %ax, %cx
# CHECK-NEXT: 0.33 0.33 0.33 - 0.50 0.50 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - tzcntw (%rax), %cx
# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - tzcntl %eax, %ecx
# CHECK-NEXT: 0.33 0.33 0.33 - 0.50 0.50 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - tzcntl (%rax), %ecx
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-cmpxchg.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-cmpxchg.s
index 64feeaf6d4ad8..903cca3b913b5 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-cmpxchg.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-cmpxchg.s
@@ -15,10 +15,10 @@ lock cmpxchg16b (%rax)
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 19 3 6.00 * * cmpxchg8b (%rax)
-# CHECK-NEXT: 28 4 14.75 * * cmpxchg16b (%rax)
-# CHECK-NEXT: 19 3 6.00 * * lock cmpxchg8b (%rax)
-# CHECK-NEXT: 28 4 14.75 * * lock cmpxchg16b (%rax)
+# CHECK-NEXT: 15 3 6.00 * * cmpxchg8b (%rax)
+# CHECK-NEXT: 26 4 14.75 * * cmpxchg16b (%rax)
+# CHECK-NEXT: 15 3 6.00 * * lock cmpxchg8b (%rax)
+# CHECK-NEXT: 26 4 14.75 * * lock cmpxchg16b (%rax)
# CHECK: Resources:
# CHECK-NEXT: [0] - Zn4AGU0
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-lea.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-lea.s
index d259949af3846..809b1bed70f08 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-lea.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-lea.s
@@ -170,11 +170,11 @@ lea 1024(%rax, %rbx, 2), %rcx
# CHECK-NEXT: 1 1 0.33 leal (,%rbx), %ecx
# CHECK-NEXT: 1 1 0.33 leaq (,%rbx), %rcx
# CHECK-NEXT: 2 2 1.00 leaw (,%ebx,2), %cx
-# CHECK-NEXT: 2 2 0.25 leal (,%ebx,2), %ecx
-# CHECK-NEXT: 2 2 0.25 leaq (,%ebx,2), %rcx
+# CHECK-NEXT: 4 3 0.25 leal (,%ebx,2), %ecx
+# CHECK-NEXT: 4 3 0.25 leaq (,%ebx,2), %rcx
# CHECK-NEXT: 2 2 1.00 leaw (,%rbx,2), %cx
-# CHECK-NEXT: 2 2 0.25 leal (,%rbx,2), %ecx
-# CHECK-NEXT: 2 2 0.25 leaq (,%rbx,2), %rcx
+# CHECK-NEXT: 4 3 0.25 leal (,%rbx,2), %ecx
+# CHECK-NEXT: 4 3 0.25 leaq (,%rbx,2), %rcx
# CHECK-NEXT: 2 2 1.00 leaw (%eax,%ebx), %cx
# CHECK-NEXT: 1 1 0.33 leal (%eax,%ebx), %ecx
# CHECK-NEXT: 1 1 0.33 leaq (%eax,%ebx), %rcx
@@ -188,11 +188,11 @@ lea 1024(%rax, %rbx, 2), %rcx
# CHECK-NEXT: 1 1 0.33 leal (%rax,%rbx), %ecx
# CHECK-NEXT: 1 1 0.33 leaq (%rax,%rbx), %rcx
# CHECK-NEXT: 2 2 1.00 leaw (%eax,%ebx,2), %cx
-# CHECK-NEXT: 2 2 0.25 leal (%eax,%ebx,2), %ecx
-# CHECK-NEXT: 2 2 0.25 leaq (%eax,%ebx,2), %rcx
+# CHECK-NEXT: 4 3 0.25 leal (%eax,%ebx,2), %ecx
+# CHECK-NEXT: 4 3 0.25 leaq (%eax,%ebx,2), %rcx
# CHECK-NEXT: 2 2 1.00 leaw (%rax,%rbx,2), %cx
-# CHECK-NEXT: 2 2 0.25 leal (%rax,%rbx,2), %ecx
-# CHECK-NEXT: 2 2 0.25 leaq (%rax,%rbx,2), %rcx
+# CHECK-NEXT: 4 3 0.25 leal (%rax,%rbx,2), %ecx
+# CHECK-NEXT: 4 3 0.25 leaq (%rax,%rbx,2), %rcx
# CHECK-NEXT: 2 2 1.00 leaw -16, %cx
# CHECK-NEXT: 1 1 0.33 leal -16, %ecx
# CHECK-NEXT: 1 1 0.33 leaq -16, %rcx
@@ -215,29 +215,29 @@ lea 1024(%rax, %rbx, 2), %rcx
# CHECK-NEXT: 1 1 0.33 leal -16(,%rbx), %ecx
# CHECK-NEXT: 1 1 0.33 leaq -16(,%rbx), %rcx
# CHECK-NEXT: 2 2 1.00 leaw -16(,%ebx,2), %cx
-# CHECK-NEXT: 2 2 0.25 leal -16(,%ebx,2), %ecx
-# CHECK-NEXT: 2 2 0.25 leaq -16(,%ebx,2), %rcx
+# CHECK-NEXT: 4 3 0.25 leal -16(,%ebx,2), %ecx
+# CHECK-NEXT: 4 3 0.25 leaq -16(,%ebx,2), %rcx
# CHECK-NEXT: 2 2 1.00 leaw -16(,%rbx,2), %cx
-# CHECK-NEXT: 2 2 0.25 leal -16(,%rbx,2), %ecx
-# CHECK-NEXT: 2 2 0.25 leaq -16(,%rbx,2), %rcx
+# CHECK-NEXT: 4 3 0.25 leal -16(,%rbx,2), %ecx
+# CHECK-NEXT: 4 3 0.25 leaq -16(,%rbx,2), %rcx
# CHECK-NEXT: 2 2 1.00 leaw -16(%eax,%ebx), %cx
-# CHECK-NEXT: 2 2 0.25 leal -16(%eax,%ebx), %ecx
-# CHECK-NEXT: 2 2 0.25 leaq -16(%eax,%ebx), %rcx
+# CHECK-NEXT: 4 3 0.25 leal -16(%eax,%ebx), %ecx
+# CHECK-NEXT: 4 3 0.25 leaq -16(%eax,%ebx), %rcx
# CHECK-NEXT: 2 2 1.00 leaw -16(%rax,%rbx), %cx
-# CHECK-NEXT: 2 2 0.25 leal -16(%rax,%rbx), %ecx
-# CHECK-NEXT: 2 2 0.25 leaq -16(%rax,%rbx), %rcx
+# CHECK-NEXT: 4 3 0.25 leal -16(%rax,%rbx), %ecx
+# CHECK-NEXT: 4 3 0.25 leaq -16(%rax,%rbx), %rcx
# CHECK-NEXT: 2 2 1.00 leaw -16(%eax,%ebx), %cx
-# CHECK-NEXT: 2 2 0.25 leal -16(%eax,%ebx), %ecx
-# CHECK-NEXT: 2 2 0.25 leaq -16(%eax,%ebx), %rcx
+# CHECK-NEXT: 4 3 0.25 leal -16(%eax,%ebx), %ecx
+# CHECK-NEXT: 4 3 0.25 leaq -16(%eax,%ebx), %rcx
# CHECK-NEXT: 2 2 1.00 leaw -16(%rax,%rbx), %cx
-# CHECK-NEXT: 2 2 0.25 leal -16(%rax,%rbx), %ecx
-# CHECK-NEXT: 2 2 0.25 leaq -16(%rax,%rbx), %rcx
+# CHECK-NEXT: 4 3 0.25 leal -16(%rax,%rbx), %ecx
+# CHECK-NEXT: 4 3 0.25 leaq -16(%rax,%rbx), %rcx
# CHECK-NEXT: 2 2 1.00 leaw -16(%eax,%ebx,2), %cx
-# CHECK-NEXT: 2 2 0.25 leal -16(%eax,%ebx,2), %ecx
-# CHECK-NEXT: 2 2 0.25 leaq -16(%eax,%ebx,2), %rcx
+# CHECK-NEXT: 4 3 0.25 leal -16(%eax,%ebx,2), %ecx
+# CHECK-NEXT: 4 3 0.25 leaq -16(%eax,%ebx,2), %rcx
# CHECK-NEXT: 2 2 1.00 leaw -16(%rax,%rbx,2), %cx
-# CHECK-NEXT: 2 2 0.25 leal -16(%rax,%rbx,2), %ecx
-# CHECK-NEXT: 2 2 0.25 leaq -16(%rax,%rbx,2), %rcx
+# CHECK-NEXT: 4 3 0.25 leal -16(%rax,%rbx,2), %ecx
+# CHECK-NEXT: 4 3 0.25 leaq -16(%rax,%rbx,2), %rcx
# CHECK-NEXT: 2 2 1.00 leaw 1024, %cx
# CHECK-NEXT: 1 1 0.33 leal 1024, %ecx
# CHECK-NEXT: 1 1 0.33 leaq 1024, %rcx
@@ -260,29 +260,29 @@ lea 1024(%rax, %rbx, 2), %rcx
# CHECK-NEXT: 1 1 0.33 leal 1024(,%rbx), %ecx
# CHECK-NEXT: 1 1 0.33 leaq 1024(,%rbx), %rcx
# CHECK-NEXT: 2 2 1.00 leaw 1024(,%ebx,2), %cx
-# CHECK-NEXT: 2 2 0.25 leal 1024(,%ebx,2), %ecx
-# CHECK-NEXT: 2 2 0.25 leaq 1024(,%ebx,2), %rcx
+# CHECK-NEXT: 4 3 0.25 leal 1024(,%ebx,2), %ecx
+# CHECK-NEXT: 4 3 0.25 leaq 1024(,%ebx,2), %rcx
# CHECK-NEXT: 2 2 1.00 leaw 1024(,%rbx,2), %cx
-# CHECK-NEXT: 2 2 0.25 leal 1024(,%rbx,2), %ecx
-# CHECK-NEXT: 2 2 0.25 leaq 1024(,%rbx,2), %rcx
+# CHECK-NEXT: 4 3 0.25 leal 1024(,%rbx,2), %ecx
+# CHECK-NEXT: 4 3 0.25 leaq 1024(,%rbx,2), %rcx
# CHECK-NEXT: 2 2 1.00 leaw 1024(%eax,%ebx), %cx
-# CHECK-NEXT: 2 2 0.25 leal 1024(%eax,%ebx), %ecx
-# CHECK-NEXT: 2 2 0.25 leaq 1024(%eax,%ebx), %rcx
+# CHECK-NEXT: 4 3 0.25 leal 1024(%eax,%ebx), %ecx
+# CHECK-NEXT: 4 3 0.25 leaq 1024(%eax,%ebx), %rcx
# CHECK-NEXT: 2 2 1.00 leaw 1024(%rax,%rbx), %cx
-# CHECK-NEXT: 2 2 0.25 leal 1024(%rax,%rbx), %ecx
-# CHECK-NEXT: 2 2 0.25 leaq 1024(%rax,%rbx), %rcx
+# CHECK-NEXT: 4 3 0.25 leal 1024(%rax,%rbx), %ecx
+# CHECK-NEXT: 4 3 0.25 leaq 1024(%rax,%rbx), %rcx
# CHECK-NEXT: 2 2 1.00 leaw 1024(%eax,%ebx), %cx
-# CHECK-NEXT: 2 2 0.25 leal 1024(%eax,%ebx), %ecx
-# CHECK-NEXT: 2 2 0.25 leaq 1024(%eax,%ebx), %rcx
+# CHECK-NEXT: 4 3 0.25 leal 1024(%eax,%ebx), %ecx
+# CHECK-NEXT: 4 3 0.25 leaq 1024(%eax,%ebx), %rcx
# CHECK-NEXT: 2 2 1.00 leaw 1024(%rax,%rbx), %cx
-# CHECK-NEXT: 2 2 0.25 leal 1024(%rax,%rbx), %ecx
-# CHECK-NEXT: 2 2 0.25 leaq 1024(%rax,%rbx), %rcx
+# CHECK-NEXT: 4 3 0.25 leal 1024(%rax,%rbx), %ecx
+# CHECK-NEXT: 4 3 0.25 leaq 1024(%rax,%rbx), %rcx
# CHECK-NEXT: 2 2 1.00 leaw 1024(%eax,%ebx,2), %cx
-# CHECK-NEXT: 2 2 0.25 leal 1024(%eax,%ebx,2), %ecx
-# CHECK-NEXT: 2 2 0.25 leaq 1024(%eax,%ebx,2), %rcx
+# CHECK-NEXT: 4 3 0.25 leal 1024(%eax,%ebx,2), %ecx
+# CHECK-NEXT: 4 3 0.25 leaq 1024(%eax,%ebx,2), %rcx
# CHECK-NEXT: 2 2 1.00 leaw 1024(%rax,%rbx,2), %cx
-# CHECK-NEXT: 2 2 0.25 leal 1024(%rax,%rbx,2), %ecx
-# CHECK-NEXT: 2 2 0.25 leaq 1024(%rax,%rbx,2), %rcx
+# CHECK-NEXT: 4 3 0.25 leal 1024(%rax,%rbx,2), %ecx
+# CHECK-NEXT: 4 3 0.25 leaq 1024(%rax,%rbx,2), %rcx
# CHECK: Resources:
# CHECK-NEXT: [0] - Zn4AGU0
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-pclmul.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-pclmul.s
index a36fb2aabe486..fc2bc8e21bf14 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-pclmul.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-pclmul.s
@@ -13,8 +13,8 @@ pclmulqdq $11, (%rax), %xmm2
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 4 4 2.00 pclmulqdq $11, %xmm0, %xmm2
-# CHECK-NEXT: 4 11 2.00 * pclmulqdq $11, (%rax), %xmm2
+# CHECK-NEXT: 4 4 1.50 pclmulqdq $11, %xmm0, %xmm2
+# CHECK-NEXT: 4 11 1.50 * pclmulqdq $11, (%rax), %xmm2
# CHECK: Resources:
# CHECK-NEXT: [0] - Zn4AGU0
@@ -43,9 +43,9 @@ pclmulqdq $11, (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - -
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
-# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - - - - - - - - - - pclmulqdq $11, %xmm0, %xmm2
-# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pclmulqdq $11, (%rax), %xmm2
+# CHECK-NEXT: - - - - - - - - 1.50 1.50 - - - - - - - - - - - - - pclmulqdq $11, %xmm0, %xmm2
+# CHECK-NEXT: - - - - - - - - 1.50 1.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - pclmulqdq $11, (%rax), %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-sse42.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-sse42.s
index 015d37e3e6296..ae608354e2a6f 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-sse42.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-sse42.s
@@ -52,12 +52,12 @@ pcmpgtq (%rax), %xmm2
# CHECK-NEXT: 1 7 1.00 * crc32q (%rax), %rcx
# CHECK-NEXT: 8 6 3.00 pcmpestri $1, %xmm0, %xmm2
# CHECK-NEXT: 12 13 3.00 * pcmpestri $1, (%rax), %xmm2
-# CHECK-NEXT: 7 6 3.00 pcmpestrm $1, %xmm0, %xmm2
-# CHECK-NEXT: 12 13 3.00 * pcmpestrm $1, (%rax), %xmm2
+# CHECK-NEXT: 7 7 3.00 pcmpestrm $1, %xmm0, %xmm2
+# CHECK-NEXT: 12 14 3.00 * pcmpestrm $1, (%rax), %xmm2
# CHECK-NEXT: 4 2 2.00 pcmpistri $1, %xmm0, %xmm2
# CHECK-NEXT: 4 9 2.00 * pcmpistri $1, (%rax), %xmm2
-# CHECK-NEXT: 3 6 2.00 pcmpistrm $1, %xmm0, %xmm2
-# CHECK-NEXT: 4 13 2.00 * pcmpistrm $1, (%rax), %xmm2
+# CHECK-NEXT: 3 7 2.00 pcmpistrm $1, %xmm0, %xmm2
+# CHECK-NEXT: 4 14 2.00 * pcmpistrm $1, (%rax), %xmm2
# CHECK-NEXT: 1 1 0.25 pcmpgtq %xmm0, %xmm2
# CHECK-NEXT: 1 8 0.50 * pcmpgtq (%rax), %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-vpclmulqdq.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-vpclmulqdq.s
index 55a36d0f1ea09..dca470338b5a4 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-vpclmulqdq.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-vpclmulqdq.s
@@ -13,8 +13,8 @@ vpclmulqdq $11, (%rax), %ymm1, %ymm3
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 4 4 2.00 vpclmulqdq $11, %ymm0, %ymm1, %ymm3
-# CHECK-NEXT: 4 11 2.00 * vpclmulqdq $11, (%rax), %ymm1, %ymm3
+# CHECK-NEXT: 4 4 1.50 vpclmulqdq $11, %ymm0, %ymm1, %ymm3
+# CHECK-NEXT: 4 11 1.50 * vpclmulqdq $11, (%rax), %ymm1, %ymm3
# CHECK: Resources:
# CHECK-NEXT: [0] - Zn4AGU0
@@ -43,9 +43,9 @@ vpclmulqdq $11, (%rax), %ymm1, %ymm3
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT: - - - - - - - - 4.00 4.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - -
+# CHECK-NEXT: - - - - - - - - 3.00 3.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
-# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - - - - - - - - - - - - vpclmulqdq $11, %ymm0, %ymm1, %ymm3
-# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpclmulqdq $11, (%rax), %ymm1, %ymm3
+# CHECK-NEXT: - - - - - - - - 1.50 1.50 - - - - - - - - - - - - - vpclmulqdq $11, %ymm0, %ymm1, %ymm3
+# CHECK-NEXT: - - - - - - - - 1.50 1.50 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpclmulqdq $11, (%rax), %ymm1, %ymm3
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-x86_64.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-x86_64.s
index 9c5b4e45896de..886d9c6930418 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-x86_64.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-x86_64.s
@@ -1173,18 +1173,18 @@ xorq (%rax), %rdi
# CHECK-NEXT: 1 6 0.67 * * andq %rsi, (%rax)
# CHECK-NEXT: 1 6 0.67 * * lock andq %rsi, (%rax)
# CHECK-NEXT: 1 5 0.33 * andq (%rax), %rdi
-# CHECK-NEXT: 6 1 1.00 bsfw %si, %di
-# CHECK-NEXT: 6 1 1.00 bsrw %si, %di
-# CHECK-NEXT: 7 5 1.00 * bsfw (%rax), %di
-# CHECK-NEXT: 7 5 1.00 * bsrw (%rax), %di
-# CHECK-NEXT: 6 1 1.00 bsfl %esi, %edi
-# CHECK-NEXT: 6 1 1.00 bsrl %esi, %edi
-# CHECK-NEXT: 7 5 1.00 * bsfl (%rax), %edi
-# CHECK-NEXT: 7 5 1.00 * bsrl (%rax), %edi
-# CHECK-NEXT: 6 1 1.00 bsfq %rsi, %rdi
-# CHECK-NEXT: 6 1 1.00 bsrq %rsi, %rdi
-# CHECK-NEXT: 7 5 1.00 * bsfq (%rax), %rdi
-# CHECK-NEXT: 7 5 1.00 * bsrq (%rax), %rdi
+# CHECK-NEXT: 1 1 1.00 bsfw %si, %di
+# CHECK-NEXT: 1 1 1.00 bsrw %si, %di
+# CHECK-NEXT: 2 5 1.00 * bsfw (%rax), %di
+# CHECK-NEXT: 2 5 1.00 * bsrw (%rax), %di
+# CHECK-NEXT: 1 1 1.00 bsfl %esi, %edi
+# CHECK-NEXT: 1 1 1.00 bsrl %esi, %edi
+# CHECK-NEXT: 2 5 1.00 * bsfl (%rax), %edi
+# CHECK-NEXT: 2 5 1.00 * bsrl (%rax), %edi
+# CHECK-NEXT: 1 1 1.00 bsfq %rsi, %rdi
+# CHECK-NEXT: 1 1 1.00 bsrq %rsi, %rdi
+# CHECK-NEXT: 2 5 1.00 * bsfq (%rax), %rdi
+# CHECK-NEXT: 2 5 1.00 * bsrq (%rax), %rdi
# CHECK-NEXT: 1 1 0.25 bswapl %eax
# CHECK-NEXT: 1 1 0.25 bswapq %rax
# CHECK-NEXT: 1 1 0.50 btw %si, %di
@@ -1321,23 +1321,23 @@ xorq (%rax), %rdi
# CHECK-NEXT: 1 1 0.25 decq %rdi
# CHECK-NEXT: 1 6 0.67 * * decq (%rax)
# CHECK-NEXT: 1 6 0.67 * * lock decq (%rax)
-# CHECK-NEXT: 2 10 10.00 U divb %dil
-# CHECK-NEXT: 2 14 10.00 * U divb (%rax)
-# CHECK-NEXT: 2 11 11.00 U divw %si
-# CHECK-NEXT: 2 15 11.00 * U divw (%rax)
-# CHECK-NEXT: 2 13 13.00 U divl %edx
-# CHECK-NEXT: 2 17 13.00 * U divl (%rax)
-# CHECK-NEXT: 2 17 17.00 U divq %rcx
-# CHECK-NEXT: 2 21 17.00 * U divq (%rax)
+# CHECK-NEXT: 2 9 9.00 U divb %dil
+# CHECK-NEXT: 2 13 9.00 * U divb (%rax)
+# CHECK-NEXT: 2 10 10.00 U divw %si
+# CHECK-NEXT: 2 14 10.00 * U divw (%rax)
+# CHECK-NEXT: 2 12 12.00 U divl %edx
+# CHECK-NEXT: 2 16 12.00 * U divl (%rax)
+# CHECK-NEXT: 2 18 18.00 U divq %rcx
+# CHECK-NEXT: 2 22 18.00 * U divq (%rax)
# CHECK-NEXT: 100 100 25.00 U enter $7, $4095
-# CHECK-NEXT: 2 10 10.00 U idivb %dil
-# CHECK-NEXT: 2 14 10.00 * U idivb (%rax)
-# CHECK-NEXT: 2 11 11.00 U idivw %si
-# CHECK-NEXT: 2 15 11.00 * U idivw (%rax)
-# CHECK-NEXT: 2 13 13.00 U idivl %edx
-# CHECK-NEXT: 2 17 13.00 * U idivl (%rax)
-# CHECK-NEXT: 2 17 17.00 U idivq %rcx
-# CHECK-NEXT: 2 21 17.00 * U idivq (%rax)
+# CHECK-NEXT: 2 9 9.00 U idivb %dil
+# CHECK-NEXT: 2 13 9.00 * U idivb (%rax)
+# CHECK-NEXT: 2 10 10.00 U idivw %si
+# CHECK-NEXT: 2 14 10.00 * U idivw (%rax)
+# CHECK-NEXT: 2 12 12.00 U idivl %edx
+# CHECK-NEXT: 2 16 12.00 * U idivl (%rax)
+# CHECK-NEXT: 2 18 18.00 U idivq %rcx
+# CHECK-NEXT: 2 22 18.00 * U idivq (%rax)
# CHECK-NEXT: 1 3 3.00 imulb %dil
# CHECK-NEXT: 1 7 3.00 * imulb (%rax)
# CHECK-NEXT: 3 3 3.00 imulw %di
@@ -1891,12 +1891,12 @@ xorq (%rax), %rdi
# CHECK-NEXT: 1 5 0.67 * * xaddq %rax, (%rbx)
# CHECK-NEXT: 1 5 0.67 * * lock xaddq %rax, (%rbx)
# CHECK-NEXT: 2 1 0.50 xchgb %bl, %cl
-# CHECK-NEXT: 5 7 0.50 * * xchgb %bl, (%rbx)
-# CHECK-NEXT: 5 7 0.50 * * lock xchgb %bl, (%rbx)
+# CHECK-NEXT: 2 7 0.50 * * xchgb %bl, (%rbx)
+# CHECK-NEXT: 2 7 0.50 * * lock xchgb %bl, (%rbx)
# CHECK-NEXT: 2 1 0.50 xchgw %bx, %ax
# CHECK-NEXT: 2 1 0.50 xchgw %bx, %cx
-# CHECK-NEXT: 5 7 0.50 * * xchgw %ax, (%rbx)
-# CHECK-NEXT: 5 7 0.50 * * lock xchgw %ax, (%rbx)
+# CHECK-NEXT: 2 7 0.50 * * xchgw %ax, (%rbx)
+# CHECK-NEXT: 2 7 0.50 * * lock xchgw %ax, (%rbx)
# CHECK-NEXT: 2 0 0.33 xchgl %ebx, %eax
# CHECK-NEXT: 2 0 0.33 xchgl %ebx, %ecx
# CHECK-NEXT: 2 6 0.50 * * xchgl %eax, (%rbx)
@@ -1975,7 +1975,7 @@ xorq (%rax), %rdi
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT: 259.00 259.00 259.00 1733.00 1865.50 1775.50 1529.50 1.50 - - - - - - - 259.00 259.00 259.00 151.67 151.67 151.67 161.00 161.00
+# CHECK-NEXT: 259.00 259.00 259.00 1725.00 1865.50 1775.50 1529.50 1.50 - - - - - - - 259.00 259.00 259.00 151.67 151.67 151.67 161.00 161.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
@@ -2266,23 +2266,23 @@ xorq (%rax), %rdi
# CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - decq %rdi
# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 decq (%rax)
# CHECK-NEXT: 0.67 0.67 0.67 0.25 0.25 0.25 0.25 - - - - - - - - 0.67 0.67 0.67 0.33 0.33 0.33 0.50 0.50 lock decq (%rax)
-# CHECK-NEXT: - - - 10.00 - - - - - - - - - - - - - - - - - - - divb %dil
-# CHECK-NEXT: 0.33 0.33 0.33 10.00 - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - divb (%rax)
-# CHECK-NEXT: - - - 11.00 - - - - - - - - - - - - - - - - - - - divw %si
-# CHECK-NEXT: 0.33 0.33 0.33 11.00 - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - divw (%rax)
-# CHECK-NEXT: - - - 13.00 - - - - - - - - - - - - - - - - - - - divl %edx
-# CHECK-NEXT: 0.33 0.33 0.33 13.00 - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - divl (%rax)
-# CHECK-NEXT: - - - 17.00 - - - - - - - - - - - - - - - - - - - divq %rcx
-# CHECK-NEXT: 0.33 0.33 0.33 17.00 - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - divq (%rax)
+# CHECK-NEXT: - - - 9.00 - - - - - - - - - - - - - - - - - - - divb %dil
+# CHECK-NEXT: 0.33 0.33 0.33 9.00 - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - divb (%rax)
+# CHECK-NEXT: - - - 10.00 - - - - - - - - - - - - - - - - - - - divw %si
+# CHECK-NEXT: 0.33 0.33 0.33 10.00 - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - divw (%rax)
+# CHECK-NEXT: - - - 12.00 - - - - - - - - - - - - - - - - - - - divl %edx
+# CHECK-NEXT: 0.33 0.33 0.33 12.00 - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - divl (%rax)
+# CHECK-NEXT: - - - 18.00 - - - - - - - - - - - - - - - - - - - divq %rcx
+# CHECK-NEXT: 0.33 0.33 0.33 18.00 - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - divq (%rax)
# CHECK-NEXT: - - - 25.00 25.00 25.00 25.00 - - - - - - - - - - - - - - - - enter $7, $4095
-# CHECK-NEXT: - - - 10.00 - - - - - - - - - - - - - - - - - - - idivb %dil
-# CHECK-NEXT: 0.33 0.33 0.33 10.00 - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - idivb (%rax)
-# CHECK-NEXT: - - - 11.00 - - - - - - - - - - - - - - - - - - - idivw %si
-# CHECK-NEXT: 0.33 0.33 0.33 11.00 - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - idivw (%rax)
-# CHECK-NEXT: - - - 13.00 - - - - - - - - - - - - - - - - - - - idivl %edx
-# CHECK-NEXT: 0.33 0.33 0.33 13.00 - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - idivl (%rax)
-# CHECK-NEXT: - - - 17.00 - - - - - - - - - - - - - - - - - - - idivq %rcx
-# CHECK-NEXT: 0.33 0.33 0.33 17.00 - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - idivq (%rax)
+# CHECK-NEXT: - - - 9.00 - - - - - - - - - - - - - - - - - - - idivb %dil
+# CHECK-NEXT: 0.33 0.33 0.33 9.00 - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - idivb (%rax)
+# CHECK-NEXT: - - - 10.00 - - - - - - - - - - - - - - - - - - - idivw %si
+# CHECK-NEXT: 0.33 0.33 0.33 10.00 - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - idivw (%rax)
+# CHECK-NEXT: - - - 12.00 - - - - - - - - - - - - - - - - - - - idivl %edx
+# CHECK-NEXT: 0.33 0.33 0.33 12.00 - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - idivl (%rax)
+# CHECK-NEXT: - - - 18.00 - - - - - - - - - - - - - - - - - - - idivq %rcx
+# CHECK-NEXT: 0.33 0.33 0.33 18.00 - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - idivq (%rax)
# CHECK-NEXT: - - - - 3.00 - - - - - - - - - - - - - - - - - - imulb %dil
# CHECK-NEXT: 0.33 0.33 0.33 - 3.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - imulb (%rax)
# CHECK-NEXT: - - - - 3.00 - - - - - - - - - - - - - - - - - - imulw %di
>From de26eec10021d441980eed7bb8be6878bc5086e5 Mon Sep 17 00:00:00 2001
From: NexusXe <andastrike at gmail.com>
Date: Wed, 1 Oct 2025 13:40:34 -0500
Subject: [PATCH 15/20] revert LEA changes
---
llvm/lib/Target/X86/X86ScheduleZnver4.td | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver4.td b/llvm/lib/Target/X86/X86ScheduleZnver4.td
index 9e3939b60408f..3d6121ff1655f 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver4.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver4.td
@@ -586,11 +586,10 @@ def : InstRW<[Zn4WriteADC8mr_SBB8mr], (instrs ADC8mr, SBB8mr)>;
defm : Zn4WriteResInt<WriteLEA, [Zn4AGU012], 1, [1], 1>; // LEA instructions can't fold loads.
// This write is used for slow LEA instructions.
-// values from uops.info
def Zn4Write3OpsLEA : SchedWriteRes<[Zn4ALU0123]> {
- let Latency = 3;
+ let Latency = 2;
let ReleaseAtCycles = [1];
- let NumMicroOps = 4;
+ let NumMicroOps = 2;
}
// On Znver4, a slow LEA is either a 3Ops LEA (base, index, offset),
>From ceec220213d7df57e1645d095f0f36fba40be6ff Mon Sep 17 00:00:00 2001
From: NexusXe <andastrike at gmail.com>
Date: Wed, 1 Oct 2025 13:41:06 -0500
Subject: [PATCH 16/20] CMPXCHG8B better TP
---
llvm/lib/Target/X86/X86ScheduleZnver4.td | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver4.td b/llvm/lib/Target/X86/X86ScheduleZnver4.td
index 3d6121ff1655f..c0698135fce6a 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver4.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver4.td
@@ -661,7 +661,7 @@ def : InstRW<[Zn4WriteCMPXCHG8rm_LCMPXCHG8], (instrs CMPXCHG8rm, LCMPXCHG8)>;
def Zn4WriteCMPXCHG8B : SchedWriteRes<[Zn4ALU0123]> {
let Latency = 3; // FIXME: not from llvm-exegesis
- let ReleaseAtCycles = [24];
+ let ReleaseAtCycles = [20];
let NumMicroOps = 15;
}
def : InstRW<[Zn4WriteCMPXCHG8B], (instrs CMPXCHG8B)>;
>From 3f10cb456d9efe098c509f56d8414013af10d127 Mon Sep 17 00:00:00 2001
From: NexusXe <andastrike at gmail.com>
Date: Wed, 1 Oct 2025 14:04:02 -0500
Subject: [PATCH 17/20] VPERMD uops
---
llvm/lib/Target/X86/X86ScheduleZnver4.td | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver4.td b/llvm/lib/Target/X86/X86ScheduleZnver4.td
index c0698135fce6a..4b136f22b44ca 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver4.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver4.td
@@ -1408,8 +1408,8 @@ def : InstRW<[Zn4WriteVPERMYri], (instrs VPERMPDYri, VPERMQYri)>;
def Zn4WriteVPERMPDYmi : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPVShuf]> {
let Latency = !add(Znver4Model.VecLoadLatency, Zn4WriteVPERMYri.Latency);
- let ReleaseAtCycles = [1, 1, 2];
- let NumMicroOps = !add(Zn4WriteVPERMYri.NumMicroOps, 1);
+ let ReleaseAtCycles = [1, 1, 1];
+ let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteVPERMPDYmi], (instrs VPERMPDYmi)>;
@@ -1422,8 +1422,8 @@ def : InstRW<[Zn4WriteVPERMDYrr], (instrs VPERMDYrr)>;
def Zn4WriteVPERMYm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPVShuf]> {
let Latency = !add(Znver4Model.VecLoadLatency, Zn4WriteVPERMDYrr.Latency);
- let ReleaseAtCycles = [1, 1, 2];
- let NumMicroOps = !add(Zn4WriteVPERMDYrr.NumMicroOps, 0);
+ let ReleaseAtCycles = [1, 1, 1];
+ let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteVPERMYm], (instrs VPERMQYmi, VPERMDYrm)>;
>From 02d554c5256b11776b18eef663e3acd1ccb203e8 Mon Sep 17 00:00:00 2001
From: NexusXe <andastrike at gmail.com>
Date: Wed, 1 Oct 2025 14:08:52 -0500
Subject: [PATCH 18/20] fix CMPXCHG16B
---
llvm/lib/Target/X86/X86ScheduleZnver4.td | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver4.td b/llvm/lib/Target/X86/X86ScheduleZnver4.td
index 4b136f22b44ca..3c97f5bec52b7 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver4.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver4.td
@@ -667,8 +667,8 @@ def Zn4WriteCMPXCHG8B : SchedWriteRes<[Zn4ALU0123]> {
def : InstRW<[Zn4WriteCMPXCHG8B], (instrs CMPXCHG8B)>;
def Zn4WriteCMPXCHG16B_LCMPXCHG16B : SchedWriteRes<[Zn4ALU0123]> {
- let Latency = 4; // FIXME: not from llvm-exegesis
- let ReleaseAtCycles = [59];
+ let Latency = 2; // FIXME: not from llvm-exegesis
+ let ReleaseAtCycles = [40];
let NumMicroOps = 26;
}
def : InstRW<[Zn4WriteCMPXCHG16B_LCMPXCHG16B], (instrs CMPXCHG16B, LCMPXCHG16B)>;
>From e4c785294443d0abbff57712bbe2007429eed2c1 Mon Sep 17 00:00:00 2001
From: NexusXe <andastrike at gmail.com>
Date: Wed, 1 Oct 2025 14:13:21 -0500
Subject: [PATCH 19/20] VALIGN has different latency depending on width
---
llvm/lib/Target/X86/X86ScheduleZnver4.td | 28 +++++++++++++++++++-----
1 file changed, 22 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver4.td b/llvm/lib/Target/X86/X86ScheduleZnver4.td
index 3c97f5bec52b7..ac4d31de8dbfe 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver4.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver4.td
@@ -1109,15 +1109,31 @@ def Zn4WriteVecOpMaskKRMov : SchedWriteRes<[Zn4FPOpMask4]> {
}
def : InstRW<[Zn4WriteVecOpMaskKRMov], (instrs KMOVBkr, KMOVDkr, KMOVQkr, KMOVWkr)>;
-def Zn4WriteVecALU2Slow : SchedWriteRes<[Zn4FPVAdd12]> {
- // TODO: All align instructions are expected to be of 4 cycle latency
- let Latency = 4;
+// 128-bit VALIGN
+def Zn4WriteXMMVecALU2Slow : SchedWriteRes<[Zn4FPVAdd12]> {
+ let Latency = 2;
+ let ReleaseAtCycles = [1];
+ let NumMicroOps = 1;
+}
+
+// 256-bit VALIGN
+def Zn4WriteYMMVecALU2Slow : SchedWriteRes<[Zn4FPVAdd12]> {
+ let Latency = 3;
let ReleaseAtCycles = [1];
let NumMicroOps = 1;
}
-def : InstRW<[Zn4WriteVecALU2Slow], (instrs VALIGNDZrri, VALIGNDZ128rri, VALIGNDZ256rri,
- VALIGNQZrri, VALIGNQZ128rri, VALIGNQZ256rri)
- >;
+
+// 512-bit VALIGN
+def Zn4WriteZMMVecALU2Slow : SchedWriteRes<[Zn4FPVAdd12]> {
+ let Latency = 4;
+ let ReleaseAtCycles = [2];
+ let NumMicroOps = 1;
+}
+
+def : InstRW<[Zn4WriteXMMVecALU2Slow], (instrs VALIGNDZrri, VALIGNQZrri)>;
+def : InstRW<[Zn4WriteYMMVecALU2Slow], (instrs VALIGNDZ128rri, VALIGNQZ128rri)>;
+def : InstRW<[Zn4WriteZMMVecALU2Slow], (instrs VALIGNDZ256rri, VALIGNQZ256rri)>;
+
defm : Zn4WriteResYMMPair<WriteVecALUY, [Zn4FPVAdd0123], 1, [1], 1>; // Vector integer ALU op, no logicals (YMM).
def Zn4WriteVecALUYSlow : SchedWriteRes<[Zn4FPVAdd01]> {
>From d300f5ee38bd0fa242533fa5407b890a0bbba2a2 Mon Sep 17 00:00:00 2001
From: NexusXe <andastrike at gmail.com>
Date: Wed, 1 Oct 2025 14:15:38 -0500
Subject: [PATCH 20/20] update resource files
---
.../llvm-mca/X86/Znver4/resources-avx2.s | 14 ++--
.../llvm-mca/X86/Znver4/resources-avx512.s | 4 +-
.../llvm-mca/X86/Znver4/resources-avx512vl.s | 14 ++--
.../llvm-mca/X86/Znver4/resources-cmpxchg.s | 18 ++---
.../tools/llvm-mca/X86/Znver4/resources-lea.s | 80 +++++++++----------
5 files changed, 65 insertions(+), 65 deletions(-)
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx2.s
index 2851632869865..6c8fac4566498 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx2.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx2.s
@@ -561,13 +561,13 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 1.00 vperm2i128 $1, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 8 1.00 * vperm2i128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 1.00 vpermd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 1 11 2.00 * vpermd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 11 1.00 * vpermd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 1.00 vpermpd $1, %ymm0, %ymm2
-# CHECK-NEXT: 2 11 2.00 * vpermpd $1, (%rax), %ymm2
+# CHECK-NEXT: 1 11 1.00 * vpermpd $1, (%rax), %ymm2
# CHECK-NEXT: 1 4 1.00 vpermps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 11 1.00 * vpermps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 1.00 vpermq $1, %ymm0, %ymm2
-# CHECK-NEXT: 1 11 2.00 * vpermq $1, (%rax), %ymm2
+# CHECK-NEXT: 1 11 1.00 * vpermq $1, (%rax), %ymm2
# CHECK-NEXT: 1 5 0.33 * vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 1 5 0.33 * vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: 1 5 0.33 * vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2
@@ -789,7 +789,7 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT: 6.67 6.67 6.67 - - - - - 93.75 131.75 92.25 36.25 80.50 80.50 29.00 52.33 52.33 52.33 50.67 50.67 50.67 2.50 2.50
+# CHECK-NEXT: 6.67 6.67 6.67 - - - - - 93.75 128.75 92.25 36.25 80.50 80.50 29.00 52.33 52.33 52.33 50.67 50.67 50.67 2.50 2.50
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
@@ -894,13 +894,13 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - vperm2i128 $1, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - - 1.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vperm2i128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - vpermd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 2.00 - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpermd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 1.00 - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpermd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - vpermpd $1, %ymm0, %ymm2
-# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 2.00 - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpermpd $1, (%rax), %ymm2
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 1.00 - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpermpd $1, (%rax), %ymm2
# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - vpermps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 1.00 - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpermps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - vpermq $1, %ymm0, %ymm2
-# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 2.00 - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpermq $1, (%rax), %ymm2
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 1.00 - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpermq $1, (%rax), %ymm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s
index 72d7de3353346..14b8e5f36c666 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s
@@ -1207,7 +1207,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: 1 3 1.00 vaddps %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 10 1.00 * vaddps (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 10 1.00 * vaddps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
-# CHECK-NEXT: 1 4 0.50 valignd $1, %zmm16, %zmm17, %zmm19
+# CHECK-NEXT: 1 2 0.50 valignd $1, %zmm16, %zmm17, %zmm19
# CHECK-NEXT: 1 8 1.00 * valignd $1, (%rax), %zmm17, %zmm19
# CHECK-NEXT: 1 8 1.00 * valignd $1, (%rax){1to16}, %zmm17, %zmm19
# CHECK-NEXT: 1 1 1.00 valignd $1, %zmm16, %zmm17, %zmm19 {%k1}
@@ -1216,7 +1216,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: 1 1 1.00 valignd $1, %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 8 1.00 * valignd $1, (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 8 1.00 * valignd $1, (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
-# CHECK-NEXT: 1 4 0.50 valignq $1, %zmm16, %zmm17, %zmm19
+# CHECK-NEXT: 1 2 0.50 valignq $1, %zmm16, %zmm17, %zmm19
# CHECK-NEXT: 1 8 1.00 * valignq $1, (%rax), %zmm17, %zmm19
# CHECK-NEXT: 1 8 1.00 * valignq $1, (%rax){1to8}, %zmm17, %zmm19
# CHECK-NEXT: 1 1 1.00 valignq $1, %zmm16, %zmm17, %zmm19 {%k1}
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vl.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vl.s
index 552b3e40284b9..ead609e33da4d 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vl.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512vl.s
@@ -1948,7 +1948,7 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK-NEXT: 1 3 0.50 vaddps %ymm16, %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: 1 10 0.50 * vaddps (%rax), %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: 1 10 0.50 * vaddps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
-# CHECK-NEXT: 1 4 0.50 valignd $1, %xmm16, %xmm17, %xmm19
+# CHECK-NEXT: 1 3 0.50 valignd $1, %xmm16, %xmm17, %xmm19
# CHECK-NEXT: 1 8 0.50 * valignd $1, (%rax), %xmm17, %xmm19
# CHECK-NEXT: 1 8 0.50 * valignd $1, (%rax){1to4}, %xmm17, %xmm19
# CHECK-NEXT: 1 1 0.50 valignd $1, %xmm16, %xmm17, %xmm19 {%k1}
@@ -1957,7 +1957,7 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK-NEXT: 1 1 0.50 valignd $1, %xmm16, %xmm17, %xmm19 {%k1} {z}
# CHECK-NEXT: 1 8 0.50 * valignd $1, (%rax), %xmm17, %xmm19 {%k1} {z}
# CHECK-NEXT: 1 8 0.50 * valignd $1, (%rax){1to4}, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT: 1 4 0.50 valignd $1, %ymm16, %ymm17, %ymm19
+# CHECK-NEXT: 1 4 1.00 valignd $1, %ymm16, %ymm17, %ymm19
# CHECK-NEXT: 1 8 0.50 * valignd $1, (%rax), %ymm17, %ymm19
# CHECK-NEXT: 1 8 0.50 * valignd $1, (%rax){1to8}, %ymm17, %ymm19
# CHECK-NEXT: 1 1 0.50 valignd $1, %ymm16, %ymm17, %ymm19 {%k1}
@@ -1966,7 +1966,7 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK-NEXT: 1 1 0.50 valignd $1, %ymm16, %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: 1 8 0.50 * valignd $1, (%rax), %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: 1 8 0.50 * valignd $1, (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
-# CHECK-NEXT: 1 4 0.50 valignq $1, %xmm16, %xmm17, %xmm19
+# CHECK-NEXT: 1 3 0.50 valignq $1, %xmm16, %xmm17, %xmm19
# CHECK-NEXT: 1 8 0.50 * valignq $1, (%rax), %xmm17, %xmm19
# CHECK-NEXT: 1 8 0.50 * valignq $1, (%rax){1to2}, %xmm17, %xmm19
# CHECK-NEXT: 1 1 0.50 valignq $1, %xmm16, %xmm17, %xmm19 {%k1}
@@ -1975,7 +1975,7 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK-NEXT: 1 1 0.50 valignq $1, %xmm16, %xmm17, %xmm19 {%k1} {z}
# CHECK-NEXT: 1 8 0.50 * valignq $1, (%rax), %xmm17, %xmm19 {%k1} {z}
# CHECK-NEXT: 1 8 0.50 * valignq $1, (%rax){1to2}, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT: 1 4 0.50 valignq $1, %ymm16, %ymm17, %ymm19
+# CHECK-NEXT: 1 4 1.00 valignq $1, %ymm16, %ymm17, %ymm19
# CHECK-NEXT: 1 8 0.50 * valignq $1, (%rax), %ymm17, %ymm19
# CHECK-NEXT: 1 8 0.50 * valignq $1, (%rax){1to4}, %ymm17, %ymm19
# CHECK-NEXT: 1 1 0.50 valignq $1, %ymm16, %ymm17, %ymm19 {%k1}
@@ -3614,7 +3614,7 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT: 10.67 10.67 10.67 - - - - - 208.00 1083.00 636.50 261.50 509.50 509.50 32.00 355.67 355.67 355.67 334.33 334.33 334.33 32.00 32.00
+# CHECK-NEXT: 10.67 10.67 10.67 - - - - - 208.00 1084.00 637.50 261.50 509.50 509.50 32.00 355.67 355.67 355.67 334.33 334.33 334.33 32.00 32.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
@@ -3663,7 +3663,7 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - valignd $1, %xmm16, %xmm17, %xmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - valignd $1, (%rax), %xmm17, %xmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - valignd $1, (%rax){1to4}, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - valignd $1, %ymm16, %ymm17, %ymm19
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - valignd $1, %ymm16, %ymm17, %ymm19
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - valignd $1, (%rax), %ymm17, %ymm19
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - valignd $1, (%rax){1to8}, %ymm17, %ymm19
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - valignd $1, %ymm16, %ymm17, %ymm19 {%k1}
@@ -3681,7 +3681,7 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - valignq $1, %xmm16, %xmm17, %xmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - valignq $1, (%rax), %xmm17, %xmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - valignq $1, (%rax){1to2}, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - valignq $1, %ymm16, %ymm17, %ymm19
+# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - valignq $1, %ymm16, %ymm17, %ymm19
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - valignq $1, (%rax), %ymm17, %ymm19
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - valignq $1, (%rax){1to4}, %ymm17, %ymm19
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - valignq $1, %ymm16, %ymm17, %ymm19 {%k1}
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-cmpxchg.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-cmpxchg.s
index 903cca3b913b5..26a42fd9964b5 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-cmpxchg.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-cmpxchg.s
@@ -15,10 +15,10 @@ lock cmpxchg16b (%rax)
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 15 3 6.00 * * cmpxchg8b (%rax)
-# CHECK-NEXT: 26 4 14.75 * * cmpxchg16b (%rax)
-# CHECK-NEXT: 15 3 6.00 * * lock cmpxchg8b (%rax)
-# CHECK-NEXT: 26 4 14.75 * * lock cmpxchg16b (%rax)
+# CHECK-NEXT: 15 3 5.00 * * cmpxchg8b (%rax)
+# CHECK-NEXT: 26 2 10.00 * * cmpxchg16b (%rax)
+# CHECK-NEXT: 15 3 5.00 * * lock cmpxchg8b (%rax)
+# CHECK-NEXT: 26 2 10.00 * * lock cmpxchg16b (%rax)
# CHECK: Resources:
# CHECK-NEXT: [0] - Zn4AGU0
@@ -47,11 +47,11 @@ lock cmpxchg16b (%rax)
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT: - - - 41.50 41.50 41.50 41.50 - - - - - - - - - - - - - - - -
+# CHECK-NEXT: - - - 30.00 30.00 30.00 30.00 - - - - - - - - - - - - - - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
-# CHECK-NEXT: - - - 6.00 6.00 6.00 6.00 - - - - - - - - - - - - - - - - cmpxchg8b (%rax)
-# CHECK-NEXT: - - - 14.75 14.75 14.75 14.75 - - - - - - - - - - - - - - - - cmpxchg16b (%rax)
-# CHECK-NEXT: - - - 6.00 6.00 6.00 6.00 - - - - - - - - - - - - - - - - lock cmpxchg8b (%rax)
-# CHECK-NEXT: - - - 14.75 14.75 14.75 14.75 - - - - - - - - - - - - - - - - lock cmpxchg16b (%rax)
+# CHECK-NEXT: - - - 5.00 5.00 5.00 5.00 - - - - - - - - - - - - - - - - cmpxchg8b (%rax)
+# CHECK-NEXT: - - - 10.00 10.00 10.00 10.00 - - - - - - - - - - - - - - - - cmpxchg16b (%rax)
+# CHECK-NEXT: - - - 5.00 5.00 5.00 5.00 - - - - - - - - - - - - - - - - lock cmpxchg8b (%rax)
+# CHECK-NEXT: - - - 10.00 10.00 10.00 10.00 - - - - - - - - - - - - - - - - lock cmpxchg16b (%rax)
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-lea.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-lea.s
index 809b1bed70f08..d259949af3846 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-lea.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-lea.s
@@ -170,11 +170,11 @@ lea 1024(%rax, %rbx, 2), %rcx
# CHECK-NEXT: 1 1 0.33 leal (,%rbx), %ecx
# CHECK-NEXT: 1 1 0.33 leaq (,%rbx), %rcx
# CHECK-NEXT: 2 2 1.00 leaw (,%ebx,2), %cx
-# CHECK-NEXT: 4 3 0.25 leal (,%ebx,2), %ecx
-# CHECK-NEXT: 4 3 0.25 leaq (,%ebx,2), %rcx
+# CHECK-NEXT: 2 2 0.25 leal (,%ebx,2), %ecx
+# CHECK-NEXT: 2 2 0.25 leaq (,%ebx,2), %rcx
# CHECK-NEXT: 2 2 1.00 leaw (,%rbx,2), %cx
-# CHECK-NEXT: 4 3 0.25 leal (,%rbx,2), %ecx
-# CHECK-NEXT: 4 3 0.25 leaq (,%rbx,2), %rcx
+# CHECK-NEXT: 2 2 0.25 leal (,%rbx,2), %ecx
+# CHECK-NEXT: 2 2 0.25 leaq (,%rbx,2), %rcx
# CHECK-NEXT: 2 2 1.00 leaw (%eax,%ebx), %cx
# CHECK-NEXT: 1 1 0.33 leal (%eax,%ebx), %ecx
# CHECK-NEXT: 1 1 0.33 leaq (%eax,%ebx), %rcx
@@ -188,11 +188,11 @@ lea 1024(%rax, %rbx, 2), %rcx
# CHECK-NEXT: 1 1 0.33 leal (%rax,%rbx), %ecx
# CHECK-NEXT: 1 1 0.33 leaq (%rax,%rbx), %rcx
# CHECK-NEXT: 2 2 1.00 leaw (%eax,%ebx,2), %cx
-# CHECK-NEXT: 4 3 0.25 leal (%eax,%ebx,2), %ecx
-# CHECK-NEXT: 4 3 0.25 leaq (%eax,%ebx,2), %rcx
+# CHECK-NEXT: 2 2 0.25 leal (%eax,%ebx,2), %ecx
+# CHECK-NEXT: 2 2 0.25 leaq (%eax,%ebx,2), %rcx
# CHECK-NEXT: 2 2 1.00 leaw (%rax,%rbx,2), %cx
-# CHECK-NEXT: 4 3 0.25 leal (%rax,%rbx,2), %ecx
-# CHECK-NEXT: 4 3 0.25 leaq (%rax,%rbx,2), %rcx
+# CHECK-NEXT: 2 2 0.25 leal (%rax,%rbx,2), %ecx
+# CHECK-NEXT: 2 2 0.25 leaq (%rax,%rbx,2), %rcx
# CHECK-NEXT: 2 2 1.00 leaw -16, %cx
# CHECK-NEXT: 1 1 0.33 leal -16, %ecx
# CHECK-NEXT: 1 1 0.33 leaq -16, %rcx
@@ -215,29 +215,29 @@ lea 1024(%rax, %rbx, 2), %rcx
# CHECK-NEXT: 1 1 0.33 leal -16(,%rbx), %ecx
# CHECK-NEXT: 1 1 0.33 leaq -16(,%rbx), %rcx
# CHECK-NEXT: 2 2 1.00 leaw -16(,%ebx,2), %cx
-# CHECK-NEXT: 4 3 0.25 leal -16(,%ebx,2), %ecx
-# CHECK-NEXT: 4 3 0.25 leaq -16(,%ebx,2), %rcx
+# CHECK-NEXT: 2 2 0.25 leal -16(,%ebx,2), %ecx
+# CHECK-NEXT: 2 2 0.25 leaq -16(,%ebx,2), %rcx
# CHECK-NEXT: 2 2 1.00 leaw -16(,%rbx,2), %cx
-# CHECK-NEXT: 4 3 0.25 leal -16(,%rbx,2), %ecx
-# CHECK-NEXT: 4 3 0.25 leaq -16(,%rbx,2), %rcx
+# CHECK-NEXT: 2 2 0.25 leal -16(,%rbx,2), %ecx
+# CHECK-NEXT: 2 2 0.25 leaq -16(,%rbx,2), %rcx
# CHECK-NEXT: 2 2 1.00 leaw -16(%eax,%ebx), %cx
-# CHECK-NEXT: 4 3 0.25 leal -16(%eax,%ebx), %ecx
-# CHECK-NEXT: 4 3 0.25 leaq -16(%eax,%ebx), %rcx
+# CHECK-NEXT: 2 2 0.25 leal -16(%eax,%ebx), %ecx
+# CHECK-NEXT: 2 2 0.25 leaq -16(%eax,%ebx), %rcx
# CHECK-NEXT: 2 2 1.00 leaw -16(%rax,%rbx), %cx
-# CHECK-NEXT: 4 3 0.25 leal -16(%rax,%rbx), %ecx
-# CHECK-NEXT: 4 3 0.25 leaq -16(%rax,%rbx), %rcx
+# CHECK-NEXT: 2 2 0.25 leal -16(%rax,%rbx), %ecx
+# CHECK-NEXT: 2 2 0.25 leaq -16(%rax,%rbx), %rcx
# CHECK-NEXT: 2 2 1.00 leaw -16(%eax,%ebx), %cx
-# CHECK-NEXT: 4 3 0.25 leal -16(%eax,%ebx), %ecx
-# CHECK-NEXT: 4 3 0.25 leaq -16(%eax,%ebx), %rcx
+# CHECK-NEXT: 2 2 0.25 leal -16(%eax,%ebx), %ecx
+# CHECK-NEXT: 2 2 0.25 leaq -16(%eax,%ebx), %rcx
# CHECK-NEXT: 2 2 1.00 leaw -16(%rax,%rbx), %cx
-# CHECK-NEXT: 4 3 0.25 leal -16(%rax,%rbx), %ecx
-# CHECK-NEXT: 4 3 0.25 leaq -16(%rax,%rbx), %rcx
+# CHECK-NEXT: 2 2 0.25 leal -16(%rax,%rbx), %ecx
+# CHECK-NEXT: 2 2 0.25 leaq -16(%rax,%rbx), %rcx
# CHECK-NEXT: 2 2 1.00 leaw -16(%eax,%ebx,2), %cx
-# CHECK-NEXT: 4 3 0.25 leal -16(%eax,%ebx,2), %ecx
-# CHECK-NEXT: 4 3 0.25 leaq -16(%eax,%ebx,2), %rcx
+# CHECK-NEXT: 2 2 0.25 leal -16(%eax,%ebx,2), %ecx
+# CHECK-NEXT: 2 2 0.25 leaq -16(%eax,%ebx,2), %rcx
# CHECK-NEXT: 2 2 1.00 leaw -16(%rax,%rbx,2), %cx
-# CHECK-NEXT: 4 3 0.25 leal -16(%rax,%rbx,2), %ecx
-# CHECK-NEXT: 4 3 0.25 leaq -16(%rax,%rbx,2), %rcx
+# CHECK-NEXT: 2 2 0.25 leal -16(%rax,%rbx,2), %ecx
+# CHECK-NEXT: 2 2 0.25 leaq -16(%rax,%rbx,2), %rcx
# CHECK-NEXT: 2 2 1.00 leaw 1024, %cx
# CHECK-NEXT: 1 1 0.33 leal 1024, %ecx
# CHECK-NEXT: 1 1 0.33 leaq 1024, %rcx
@@ -260,29 +260,29 @@ lea 1024(%rax, %rbx, 2), %rcx
# CHECK-NEXT: 1 1 0.33 leal 1024(,%rbx), %ecx
# CHECK-NEXT: 1 1 0.33 leaq 1024(,%rbx), %rcx
# CHECK-NEXT: 2 2 1.00 leaw 1024(,%ebx,2), %cx
-# CHECK-NEXT: 4 3 0.25 leal 1024(,%ebx,2), %ecx
-# CHECK-NEXT: 4 3 0.25 leaq 1024(,%ebx,2), %rcx
+# CHECK-NEXT: 2 2 0.25 leal 1024(,%ebx,2), %ecx
+# CHECK-NEXT: 2 2 0.25 leaq 1024(,%ebx,2), %rcx
# CHECK-NEXT: 2 2 1.00 leaw 1024(,%rbx,2), %cx
-# CHECK-NEXT: 4 3 0.25 leal 1024(,%rbx,2), %ecx
-# CHECK-NEXT: 4 3 0.25 leaq 1024(,%rbx,2), %rcx
+# CHECK-NEXT: 2 2 0.25 leal 1024(,%rbx,2), %ecx
+# CHECK-NEXT: 2 2 0.25 leaq 1024(,%rbx,2), %rcx
# CHECK-NEXT: 2 2 1.00 leaw 1024(%eax,%ebx), %cx
-# CHECK-NEXT: 4 3 0.25 leal 1024(%eax,%ebx), %ecx
-# CHECK-NEXT: 4 3 0.25 leaq 1024(%eax,%ebx), %rcx
+# CHECK-NEXT: 2 2 0.25 leal 1024(%eax,%ebx), %ecx
+# CHECK-NEXT: 2 2 0.25 leaq 1024(%eax,%ebx), %rcx
# CHECK-NEXT: 2 2 1.00 leaw 1024(%rax,%rbx), %cx
-# CHECK-NEXT: 4 3 0.25 leal 1024(%rax,%rbx), %ecx
-# CHECK-NEXT: 4 3 0.25 leaq 1024(%rax,%rbx), %rcx
+# CHECK-NEXT: 2 2 0.25 leal 1024(%rax,%rbx), %ecx
+# CHECK-NEXT: 2 2 0.25 leaq 1024(%rax,%rbx), %rcx
# CHECK-NEXT: 2 2 1.00 leaw 1024(%eax,%ebx), %cx
-# CHECK-NEXT: 4 3 0.25 leal 1024(%eax,%ebx), %ecx
-# CHECK-NEXT: 4 3 0.25 leaq 1024(%eax,%ebx), %rcx
+# CHECK-NEXT: 2 2 0.25 leal 1024(%eax,%ebx), %ecx
+# CHECK-NEXT: 2 2 0.25 leaq 1024(%eax,%ebx), %rcx
# CHECK-NEXT: 2 2 1.00 leaw 1024(%rax,%rbx), %cx
-# CHECK-NEXT: 4 3 0.25 leal 1024(%rax,%rbx), %ecx
-# CHECK-NEXT: 4 3 0.25 leaq 1024(%rax,%rbx), %rcx
+# CHECK-NEXT: 2 2 0.25 leal 1024(%rax,%rbx), %ecx
+# CHECK-NEXT: 2 2 0.25 leaq 1024(%rax,%rbx), %rcx
# CHECK-NEXT: 2 2 1.00 leaw 1024(%eax,%ebx,2), %cx
-# CHECK-NEXT: 4 3 0.25 leal 1024(%eax,%ebx,2), %ecx
-# CHECK-NEXT: 4 3 0.25 leaq 1024(%eax,%ebx,2), %rcx
+# CHECK-NEXT: 2 2 0.25 leal 1024(%eax,%ebx,2), %ecx
+# CHECK-NEXT: 2 2 0.25 leaq 1024(%eax,%ebx,2), %rcx
# CHECK-NEXT: 2 2 1.00 leaw 1024(%rax,%rbx,2), %cx
-# CHECK-NEXT: 4 3 0.25 leal 1024(%rax,%rbx,2), %ecx
-# CHECK-NEXT: 4 3 0.25 leaq 1024(%rax,%rbx,2), %rcx
+# CHECK-NEXT: 2 2 0.25 leal 1024(%rax,%rbx,2), %ecx
+# CHECK-NEXT: 2 2 0.25 leaq 1024(%rax,%rbx,2), %rcx
# CHECK: Resources:
# CHECK-NEXT: [0] - Zn4AGU0
More information about the llvm-commits
mailing list