[llvm] [X86] Ensure models use vector load latency for vector loads (PR #157631)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 9 02:13:02 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
Noticed while addressing #<!-- -->146564 - some of the znver3/4 overrides for vector ops were using the scalar load latencies by mistake
---
Patch is 23.73 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/157631.diff
8 Files Affected:
- (modified) llvm/lib/Target/X86/X86ScheduleZnver3.td (+10-10)
- (modified) llvm/lib/Target/X86/X86ScheduleZnver4.td (+10-10)
- (modified) llvm/test/tools/llvm-mca/X86/Znver3/resources-avx1.s (+3-3)
- (modified) llvm/test/tools/llvm-mca/X86/Znver3/resources-avx2.s (+5-5)
- (modified) llvm/test/tools/llvm-mca/X86/Znver3/resources-sha.s (+5-5)
- (modified) llvm/test/tools/llvm-mca/X86/Znver4/resources-avx1.s (+3-3)
- (modified) llvm/test/tools/llvm-mca/X86/Znver4/resources-avx2.s (+5-5)
- (modified) llvm/test/tools/llvm-mca/X86/Znver4/resources-sha.s (+5-5)
``````````diff
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver3.td b/llvm/lib/Target/X86/X86ScheduleZnver3.td
index 9e271c1ee3709..044b77f7aacf4 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver3.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver3.td
@@ -992,14 +992,14 @@ def Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr : SchedWriteRes<[Zn3FPFMisc0]> {
def : InstRW<[Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr], (instrs VEXTRACTF128rri, VEXTRACTI128rri)>;
def Zn3WriteVEXTRACTI128mr : SchedWriteRes<[Zn3FPFMisc0, Zn3FPSt, Zn3Store]> {
- let Latency = !add(Znver3Model.LoadLatency, Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency);
+ let Latency = !add(Znver3Model.VecLoadLatency, Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency);
let ReleaseAtCycles = [1, 1, 1];
let NumMicroOps = !add(Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr.NumMicroOps, 1);
}
def : InstRW<[Zn3WriteVEXTRACTI128mr], (instrs VEXTRACTI128mri, VEXTRACTF128mri)>;
def Zn3WriteVINSERTF128rmr : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPFMisc0]> {
- let Latency = !add(Znver3Model.LoadLatency, Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency);
+ let Latency = !add(Znver3Model.VecLoadLatency, Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency);
let ReleaseAtCycles = [1, 1, 1];
let NumMicroOps = !add(Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr.NumMicroOps, 0);
}
@@ -1221,7 +1221,7 @@ def Zn3WriteSHA1MSG1rr : SchedWriteRes<[Zn3FPU0123]> {
def : InstRW<[Zn3WriteSHA1MSG1rr], (instrs SHA1MSG1rr)>;
def Zn3WriteSHA1MSG1rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> {
- let Latency = !add(Znver3Model.LoadLatency, Zn3WriteSHA1MSG1rr.Latency);
+ let Latency = !add(Znver3Model.VecLoadLatency, Zn3WriteSHA1MSG1rr.Latency);
let ReleaseAtCycles = [1, 1, 2];
let NumMicroOps = !add(Zn3WriteSHA1MSG1rr.NumMicroOps, 0);
}
@@ -1235,7 +1235,7 @@ def Zn3WriteSHA1MSG2rr_SHA1NEXTErr : SchedWriteRes<[Zn3FPU0123]> {
def : InstRW<[Zn3WriteSHA1MSG2rr_SHA1NEXTErr], (instrs SHA1MSG2rr, SHA1NEXTErr)>;
def Zn3Writerm_SHA1MSG2rm_SHA1NEXTErm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> {
- let Latency = !add(Znver3Model.LoadLatency, Zn3WriteSHA1MSG2rr_SHA1NEXTErr.Latency);
+ let Latency = !add(Znver3Model.VecLoadLatency, Zn3WriteSHA1MSG2rr_SHA1NEXTErr.Latency);
let ReleaseAtCycles = [1, 1, 2];
let NumMicroOps = !add(Zn3WriteSHA1MSG2rr_SHA1NEXTErr.NumMicroOps, 0);
}
@@ -1249,7 +1249,7 @@ def Zn3WriteSHA256MSG1rr : SchedWriteRes<[Zn3FPU0123]> {
def : InstRW<[Zn3WriteSHA256MSG1rr], (instrs SHA256MSG1rr)>;
def Zn3Writerm_SHA256MSG1rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> {
- let Latency = !add(Znver3Model.LoadLatency, Zn3WriteSHA256MSG1rr.Latency);
+ let Latency = !add(Znver3Model.VecLoadLatency, Zn3WriteSHA256MSG1rr.Latency);
let ReleaseAtCycles = [1, 1, 3];
let NumMicroOps = !add(Zn3WriteSHA256MSG1rr.NumMicroOps, 0);
}
@@ -1263,7 +1263,7 @@ def Zn3WriteSHA256MSG2rr : SchedWriteRes<[Zn3FPU0123]> {
def : InstRW<[Zn3WriteSHA256MSG2rr], (instrs SHA256MSG2rr)>;
def Zn3WriteSHA256MSG2rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> {
- let Latency = !add(Znver3Model.LoadLatency, Zn3WriteSHA256MSG2rr.Latency);
+ let Latency = !add(Znver3Model.VecLoadLatency, Zn3WriteSHA256MSG2rr.Latency);
let ReleaseAtCycles = [1, 1, 8];
let NumMicroOps = !add(Zn3WriteSHA256MSG2rr.NumMicroOps, 1);
}
@@ -1338,14 +1338,14 @@ def Zn3WriteVPERM2I128rr_VPERM2F128rr : SchedWriteRes<[Zn3FPVShuf]> {
def : InstRW<[Zn3WriteVPERM2I128rr_VPERM2F128rr], (instrs VPERM2I128rri, VPERM2F128rri)>;
def Zn3WriteVPERM2F128rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPVShuf]> {
- let Latency = !add(Znver3Model.LoadLatency, Zn3WriteVPERM2I128rr_VPERM2F128rr.Latency);
+ let Latency = !add(Znver3Model.VecLoadLatency, Zn3WriteVPERM2I128rr_VPERM2F128rr.Latency);
let ReleaseAtCycles = [1, 1, 1];
let NumMicroOps = !add(Zn3WriteVPERM2I128rr_VPERM2F128rr.NumMicroOps, 0);
}
def : InstRW<[Zn3WriteVPERM2F128rm], (instrs VPERM2F128rmi)>;
def Zn3WriteVPERMPSYrm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPVShuf]> {
- let Latency = !add(Znver3Model.LoadLatency, 7);
+ let Latency = !add(Znver3Model.VecLoadLatency, 7);
let ReleaseAtCycles = [1, 1, 2];
let NumMicroOps = 3;
}
@@ -1359,14 +1359,14 @@ def Zn3WriteVPERMYri : SchedWriteRes<[Zn3FPVShuf]> {
def : InstRW<[Zn3WriteVPERMYri], (instrs VPERMPDYri, VPERMQYri)>;
def Zn3WriteVPERMPDYmi : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPVShuf]> {
- let Latency = !add(Znver3Model.LoadLatency, Zn3WriteVPERMYri.Latency);
+ let Latency = !add(Znver3Model.VecLoadLatency, Zn3WriteVPERMYri.Latency);
let ReleaseAtCycles = [1, 1, 2];
let NumMicroOps = !add(Zn3WriteVPERMYri.NumMicroOps, 1);
}
def : InstRW<[Zn3WriteVPERMPDYmi], (instrs VPERMPDYmi)>;
def Zn3WriteVPERMDYm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPVShuf]> {
- let Latency = !add(Znver3Model.LoadLatency, 5);
+ let Latency = !add(Znver3Model.VecLoadLatency, 5);
let ReleaseAtCycles = [1, 1, 2];
let NumMicroOps = 2;
}
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver4.td b/llvm/lib/Target/X86/X86ScheduleZnver4.td
index 74d916d41f831..f4b8f8927b1b5 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver4.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver4.td
@@ -1005,14 +1005,14 @@ def Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr : SchedWriteRes<[Zn4FPFMisc0]> {
def : InstRW<[Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr], (instrs VEXTRACTF128rri, VEXTRACTI128rri)>;
def Zn4WriteVEXTRACTI128mr : SchedWriteRes<[Zn4FPFMisc0, Zn4FPSt, Zn4Store]> {
- let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency);
+ let Latency = !add(Znver4Model.VecLoadLatency, Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency);
let ReleaseAtCycles = [1, 1, 1];
let NumMicroOps = !add(Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr.NumMicroOps, 1);
}
def : InstRW<[Zn4WriteVEXTRACTI128mr], (instrs VEXTRACTI128mri, VEXTRACTF128mri)>;
def Zn4WriteVINSERTF128rmr : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPFMisc0]> {
- let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency);
+ let Latency = !add(Znver4Model.VecLoadLatency, Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency);
let ReleaseAtCycles = [1, 1, 1];
let NumMicroOps = !add(Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr.NumMicroOps, 0);
}
@@ -1262,7 +1262,7 @@ def Zn4WriteSHA1MSG1rr : SchedWriteRes<[Zn4FPU0123]> {
def : InstRW<[Zn4WriteSHA1MSG1rr], (instrs SHA1MSG1rr)>;
def Zn4WriteSHA1MSG1rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPU0123]> {
- let Latency = !add(Znver4Model.LoadLatency, Zn4WriteSHA1MSG1rr.Latency);
+ let Latency = !add(Znver4Model.VecLoadLatency, Zn4WriteSHA1MSG1rr.Latency);
let ReleaseAtCycles = [1, 1, 2];
let NumMicroOps = !add(Zn4WriteSHA1MSG1rr.NumMicroOps, 0);
}
@@ -1276,7 +1276,7 @@ def Zn4WriteSHA1MSG2rr_SHA1NEXTErr : SchedWriteRes<[Zn4FPU0123]> {
def : InstRW<[Zn4WriteSHA1MSG2rr_SHA1NEXTErr], (instrs SHA1MSG2rr, SHA1NEXTErr)>;
def Zn4Writerm_SHA1MSG2rm_SHA1NEXTErm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPU0123]> {
- let Latency = !add(Znver4Model.LoadLatency, Zn4WriteSHA1MSG2rr_SHA1NEXTErr.Latency);
+ let Latency = !add(Znver4Model.VecLoadLatency, Zn4WriteSHA1MSG2rr_SHA1NEXTErr.Latency);
let ReleaseAtCycles = [1, 1, 2];
let NumMicroOps = !add(Zn4WriteSHA1MSG2rr_SHA1NEXTErr.NumMicroOps, 0);
}
@@ -1290,7 +1290,7 @@ def Zn4WriteSHA256MSG1rr : SchedWriteRes<[Zn4FPU0123]> {
def : InstRW<[Zn4WriteSHA256MSG1rr], (instrs SHA256MSG1rr)>;
def Zn4Writerm_SHA256MSG1rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPU0123]> {
- let Latency = !add(Znver4Model.LoadLatency, Zn4WriteSHA256MSG1rr.Latency);
+ let Latency = !add(Znver4Model.VecLoadLatency, Zn4WriteSHA256MSG1rr.Latency);
let ReleaseAtCycles = [1, 1, 3];
let NumMicroOps = !add(Zn4WriteSHA256MSG1rr.NumMicroOps, 0);
}
@@ -1304,7 +1304,7 @@ def Zn4WriteSHA256MSG2rr : SchedWriteRes<[Zn4FPU0123]> {
def : InstRW<[Zn4WriteSHA256MSG2rr], (instrs SHA256MSG2rr)>;
def Zn4WriteSHA256MSG2rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPU0123]> {
- let Latency = !add(Znver4Model.LoadLatency, Zn4WriteSHA256MSG2rr.Latency);
+ let Latency = !add(Znver4Model.VecLoadLatency, Zn4WriteSHA256MSG2rr.Latency);
let ReleaseAtCycles = [1, 1, 8];
let NumMicroOps = !add(Zn4WriteSHA256MSG2rr.NumMicroOps, 1);
}
@@ -1379,7 +1379,7 @@ def Zn4WriteVPERM2I128rr_VPERM2F128rr : SchedWriteRes<[Zn4FPVShuf]> {
def : InstRW<[Zn4WriteVPERM2I128rr_VPERM2F128rr], (instrs VPERM2I128rri, VPERM2F128rri)>;
def Zn4WriteVPERM2F128rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPVShuf]> {
- let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVPERM2I128rr_VPERM2F128rr.Latency);
+ let Latency = !add(Znver4Model.VecLoadLatency, Zn4WriteVPERM2I128rr_VPERM2F128rr.Latency);
let ReleaseAtCycles = [1, 1, 1];
let NumMicroOps = !add(Zn4WriteVPERM2I128rr_VPERM2F128rr.NumMicroOps, 0);
}
@@ -1393,7 +1393,7 @@ def Zn4WriteVPERMPSYrr : SchedWriteRes<[Zn4FPVShuf]> {
def : InstRW<[Zn4WriteVPERMPSYrr], (instrs VPERMPSYrr)>;
def Zn4WriteVPERMPSYrm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPVShuf]> {
- let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVPERMPSYrr.Latency);
+ let Latency = !add(Znver4Model.VecLoadLatency, Zn4WriteVPERMPSYrr.Latency);
let ReleaseAtCycles = [1, 1, 2];
let NumMicroOps = !add(Zn4WriteVPERMPSYrr.NumMicroOps, 1);
}
@@ -1407,7 +1407,7 @@ def Zn4WriteVPERMYri : SchedWriteRes<[Zn4FPVShuf]> {
def : InstRW<[Zn4WriteVPERMYri], (instrs VPERMPDYri, VPERMQYri)>;
def Zn4WriteVPERMPDYmi : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPVShuf]> {
- let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVPERMYri.Latency);
+ let Latency = !add(Znver4Model.VecLoadLatency, Zn4WriteVPERMYri.Latency);
let ReleaseAtCycles = [1, 1, 2];
let NumMicroOps = !add(Zn4WriteVPERMYri.NumMicroOps, 1);
}
@@ -1421,7 +1421,7 @@ def Zn4WriteVPERMDYrr : SchedWriteRes<[Zn4FPVShuf]> {
def : InstRW<[Zn4WriteVPERMDYrr], (instrs VPERMDYrr)>;
def Zn4WriteVPERMYm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPVShuf]> {
- let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVPERMDYrr.Latency);
+ let Latency = !add(Znver4Model.VecLoadLatency, Zn4WriteVPERMDYrr.Latency);
let ReleaseAtCycles = [1, 1, 2];
let NumMicroOps = !add(Zn4WriteVPERMDYrr.NumMicroOps, 0);
}
diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-avx1.s
index 4f0b4843d1704..0abf8ad61a4a0 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-avx1.s
@@ -1193,7 +1193,7 @@ vzeroupper
# CHECK-NEXT: 7 15 4.00 vdpps $22, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 8 22 4.00 * vdpps $22, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 1.00 vextractf128 $1, %ymm0, %xmm2
-# CHECK-NEXT: 2 8 1.00 * vextractf128 $1, %ymm0, (%rax)
+# CHECK-NEXT: 2 11 1.00 * vextractf128 $1, %ymm0, (%rax)
# CHECK-NEXT: 2 1 1.00 vextractps $1, %xmm0, %ecx
# CHECK-NEXT: 2 2 1.00 * vextractps $1, %xmm0, (%rax)
# CHECK-NEXT: 4 6 2.00 vhaddpd %xmm0, %xmm1, %xmm2
@@ -1213,7 +1213,7 @@ vzeroupper
# CHECK-NEXT: 3 6 2.00 vhsubps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 4 13 2.00 * vhsubps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 2 1.00 vinsertf128 $1, %xmm0, %ymm1, %ymm2
-# CHECK-NEXT: 1 8 1.00 * vinsertf128 $1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 11 1.00 * vinsertf128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vinsertps $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 0.50 * vinsertps $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 8 0.50 * vlddqu (%rax), %xmm2
@@ -1430,7 +1430,7 @@ vzeroupper
# CHECK-NEXT: 3 6 2.00 vpcmpistrm $1, %xmm0, %xmm2
# CHECK-NEXT: 4 13 2.00 * vpcmpistrm $1, (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 vperm2f128 $1, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 1 7 1.00 * vperm2f128 $1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 10 1.00 * vperm2f128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vpermilpd $1, %xmm0, %xmm2
# CHECK-NEXT: 1 8 0.50 * vpermilpd $1, (%rax), %xmm2
# CHECK-NEXT: 1 3 0.50 vpermilpd %xmm0, %xmm1, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-avx2.s
index 1a8b9e2de1d8e..bc504285a5814 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-avx2.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-avx2.s
@@ -464,7 +464,7 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 2 1.00 vbroadcastsd %xmm0, %ymm0
# CHECK-NEXT: 1 2 1.00 vbroadcastss %xmm0, %ymm0
# CHECK-NEXT: 1 4 1.00 vextracti128 $1, %ymm0, %xmm2
-# CHECK-NEXT: 2 8 1.00 * vextracti128 $1, %ymm0, (%rax)
+# CHECK-NEXT: 2 11 1.00 * vextracti128 $1, %ymm0, (%rax)
# CHECK-NEXT: 1 5 0.33 * vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 1 5 0.33 * vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2
# CHECK-NEXT: 1 5 0.33 * vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2
@@ -561,13 +561,13 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 1.00 vperm2i128 $1, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 9 1.00 * vperm2i128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 2 5 1.00 vpermd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 2 9 2.00 * vpermd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 2 12 2.00 * vpermd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 2 6 1.00 vpermpd $1, %ymm0, %ymm2
-# CHECK-NEXT: 3 10 2.00 * vpermpd $1, (%rax), %ymm2
+# CHECK-NEXT: 3 13 2.00 * vpermpd $1, (%rax), %ymm2
# CHECK-NEXT: 2 7 1.00 vpermps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 3 11 2.00 * vpermps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 3 14 2.00 * vpermps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 2 6 1.00 vpermq $1, %ymm0, %ymm2
-# CHECK-NEXT: 2 9 2.00 * vpermq $1, (%rax), %ymm2
+# CHECK-NEXT: 2 12 2.00 * vpermq $1, (%rax), %ymm2
# CHECK-NEXT: 1 5 0.33 * vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 1 5 0.33 * vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: 1 5 0.33 * vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-sha.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sha.s
index e6d5ab90a2acc..a9827788de39a 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-sha.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sha.s
@@ -32,17 +32,17 @@ sha256rnds2 (%rax), %xmm2
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 2 2 0.50 sha1msg1 %xmm0, %xmm2
-# CHECK-NEXT: 2 6 0.50 * sha1msg1 (%rax), %xmm2
+# CHECK-NEXT: 2 9 0.50 * sha1msg1 (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 sha1msg2 %xmm0, %xmm2
-# CHECK-NEXT: 1 5 0.50 * sha1msg2 (%rax), %xmm2
+# CHECK-NEXT: 1 8 0.50 * sha1msg2 (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 sha1nexte %xmm0, %xmm2
-# CHECK-NEXT: 1 5 0.50 * sha1nexte (%rax), %xmm2
+# CHECK-NEXT: 1 8 0.50 * sha1nexte (%rax), %xmm2
# CHECK-NEXT: 1 6 2.00 sha1rnds4 $3, %xmm0, %xmm2
# CHECK-NEXT: 1 10 0.50 * sha1rnds4 $3, (%rax), %xmm2
# CHECK-NEXT: 2 2 0.75 sha256msg1 %xmm0, %xmm2
-# CHECK-NEXT: 2 6 0.75 * sha256msg1 (%rax), %xmm2
+# CHECK-NEXT: 2 9 0.75 * sha256msg1 (%rax), %xmm2
# CHECK-NEXT: 4 3 2.00 sha256msg2 %xmm0, %xmm2
-# CHECK-NEXT: 5 7 2.00 * sha256msg2 (%rax), %xmm2
+# CHECK-NEXT: 5 10 2.00 * sha256msg2 (%rax), %xmm2
# CHECK-NEXT: 1 4 2.00 sha256rnds2 %xmm0, %xmm0, %xmm2
# CHECK-NEXT: 1 10 0.50 * sha256rnds2 %xmm0, (%rax), %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx1.s
index 1b2735a9cdde8..9b721c933ab51 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx1.s
@@ -1193,7 +1193,7 @@ vzeroupper
# CHECK-NEXT: 7 11 4.00 vdpps $22, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 8 18 4.00 * vdpps $22, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 1.00 vextractf128 $1, %ymm0, %xmm2
-# CHECK-NEXT: 2 8 1.00 * vextractf128 $1, %ymm0, (%rax)
+# CHECK-NEXT: 2 11 1.00 * vextractf128 $1, %ymm0, (%rax)
# CHECK-NEXT: 2 1 1.00 vextractps $1, %xmm0, %ecx
# CHECK-NEXT: 2 2 1.00 * vextractps $1, %xmm0, (%rax)
# CHECK-NEXT: 3 4 2.00 vhaddpd %xmm0, %xmm1, %xmm2
@@ -1213,7 +1213,7 @@ vzeroupper
# CHECK-NEXT: 3 4 2.00 vhsubps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 4 11 2.00 * vhsubps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 2 1.00 vinsertf128 $1, %xmm0, %ymm1, %ymm2
-# CHECK-NEXT: 1 8 1.00 * vinsertf128 $1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 11 1.00 * vinsertf128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vinsertps $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 0.50 * vinsertps $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 8 0.50 * vlddqu (%rax), %xmm2
@@ -1430,7 +1430,7 @@ vzeroupper
# CHECK-NEXT: 3 6 2.00 vpcmpistrm $1, %xmm0, %xmm2
# CHECK-NEXT: 4 13 2.00 * vpcmpistrm $1, (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 vperm2f128 $1, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 1 7 1.00 * vperm2f128 $1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 10 1.00 * vperm2f128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vpermilpd $1, %xmm0, %xmm2
# CHECK-NEXT: 1 8 0.50 * vpermilpd $1, (%rax), %xmm2
# CHECK-NEXT: 1 3 0.50 vpermilpd %xmm0, %xmm1, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx2.s
index 0ad14688d0b7f..25e367c96e44b 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx2.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx2.s
@@ -464,7 +464,7 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 2 1.00 vbroadcastsd %xmm0, %ymm0
# CHECK-NEXT: 1 2 1.00 vbroadcastss %xmm0, %ymm0
# CHECK-NEXT: 1 4 1.00 vextracti128 $1, %ymm0, %xmm2
-# CHECK-NEXT: 2 8 1.00 * vextracti128 $1, %ymm0, (%rax)
+# CHECK-NEXT: 2 11 1.00 * ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/157631
More information about the llvm-commits
mailing list