[llvm] [AArch64] Change IssueWidth to 5 in AArch64SchedNeoverseN2.td (PR #145717)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 25 08:07:37 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Simon Wallis (simonwallis2)
<details>
<summary>Changes</summary>
It has been observed that the issue width for neoverse-n2 CPUs is set too high, and does not properly reflect the dispatch constraints.
I tested various values of IssueWidth (10, 8, 6, 5, 4) with runs of various workloads on a neoverse-n2 machine and I got the highest overall geomean score with an issue width of 5.
If this patch were to cause any major regression post-commit, it could be easily reverted, but it is likely to show an overall improvement.
Related Neoverse-V2 PR: https://github.com/llvm/llvm-project/pull/142565
Change-Id: Icdbc0aef5ea004439da6abffaaae3151f91c1b5c
---
Patch is 324.01 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/145717.diff
4 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td (+1-1)
- (modified) llvm/test/CodeGen/AArch64/machine-combiner.ll (+4-4)
- (modified) llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s (+9-9)
- (modified) llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-writeback.s (+2009-2005)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
index e23daec97bd2d..91a707910a7f3 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
@@ -11,7 +11,7 @@
//===----------------------------------------------------------------------===//
def NeoverseN2Model : SchedMachineModel {
- let IssueWidth = 10; // Micro-ops dispatched at a time.
+ let IssueWidth = 5; // Micro-ops dispatched at a time.
let MicroOpBufferSize = 160; // Entries in micro-op re-order buffer.
let LoadLatency = 4; // Optimistic load latency.
let MispredictPenalty = 10; // Extra cycles for mispredicted branch.
diff --git a/llvm/test/CodeGen/AArch64/machine-combiner.ll b/llvm/test/CodeGen/AArch64/machine-combiner.ll
index c8df283aace0b..70a638857ce4a 100644
--- a/llvm/test/CodeGen/AArch64/machine-combiner.ll
+++ b/llvm/test/CodeGen/AArch64/machine-combiner.ll
@@ -262,8 +262,8 @@ define half @reassociate_adds_half(half %x0, half %x1, half %x2, half %x3) {
; CHECK-UNSAFE-LABEL: reassociate_adds_half:
; CHECK-UNSAFE: // %bb.0:
; CHECK-UNSAFE-NEXT: fdiv h0, h0, h1
-; CHECK-UNSAFE-NEXT: fadd h2, h3, h2
-; CHECK-UNSAFE-NEXT: fadd h0, h2, h0
+; CHECK-UNSAFE-NEXT: fadd h1, h3, h2
+; CHECK-UNSAFE-NEXT: fadd h0, h1, h0
; CHECK-UNSAFE-NEXT: ret
%t0 = fdiv half %x0, %x1
%t1 = fadd half %x2, %t0
@@ -284,8 +284,8 @@ define half @reassociate_muls_half(half %x0, half %x1, half %x2, half %x3) {
; CHECK-UNSAFE-LABEL: reassociate_muls_half:
; CHECK-UNSAFE: // %bb.0:
; CHECK-UNSAFE-NEXT: fdiv h0, h0, h1
-; CHECK-UNSAFE-NEXT: fmul h2, h3, h2
-; CHECK-UNSAFE-NEXT: fmul h0, h2, h0
+; CHECK-UNSAFE-NEXT: fmul h1, h3, h2
+; CHECK-UNSAFE-NEXT: fmul h0, h1, h0
; CHECK-UNSAFE-NEXT: ret
%t0 = fdiv half %x0, %x1
%t1 = fmul half %x2, %t0
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s
index 99e39567b1ad6..ef9d4463ebe52 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s
@@ -5066,19 +5066,19 @@ zip2 z31.s, z31.s, z31.s
# CHECK-NEXT: 2 2 1.00 movs p0.b, p0/z, p0.b
# CHECK-NEXT: 2 2 1.00 movs p15.b, p15.b
# CHECK-NEXT: 2 2 1.00 movs p15.b, p15/z, p15.b
-# CHECK-NEXT: 1 1 0.10 U mrs x3, ID_AA64ZFR0_EL1
-# CHECK-NEXT: 1 1 0.10 U mrs x3, ZCR_EL1
-# CHECK-NEXT: 1 1 0.10 U mrs x3, ZCR_EL12
-# CHECK-NEXT: 1 1 0.10 U mrs x3, ZCR_EL2
-# CHECK-NEXT: 1 1 0.10 U mrs x3, ZCR_EL3
+# CHECK-NEXT: 1 1 0.20 U mrs x3, ID_AA64ZFR0_EL1
+# CHECK-NEXT: 1 1 0.20 U mrs x3, ZCR_EL1
+# CHECK-NEXT: 1 1 0.20 U mrs x3, ZCR_EL12
+# CHECK-NEXT: 1 1 0.20 U mrs x3, ZCR_EL2
+# CHECK-NEXT: 1 1 0.20 U mrs x3, ZCR_EL3
# CHECK-NEXT: 1 4 1.00 msb z0.b, p7/m, z1.b, z31.b
# CHECK-NEXT: 2 5 2.00 msb z0.d, p7/m, z1.d, z31.d
# CHECK-NEXT: 1 4 1.00 msb z0.h, p7/m, z1.h, z31.h
# CHECK-NEXT: 1 4 1.00 msb z0.s, p7/m, z1.s, z31.s
-# CHECK-NEXT: 1 1 0.10 U msr ZCR_EL1, x3
-# CHECK-NEXT: 1 1 0.10 U msr ZCR_EL12, x3
-# CHECK-NEXT: 1 1 0.10 U msr ZCR_EL2, x3
-# CHECK-NEXT: 1 1 0.10 U msr ZCR_EL3, x3
+# CHECK-NEXT: 1 1 0.20 U msr ZCR_EL1, x3
+# CHECK-NEXT: 1 1 0.20 U msr ZCR_EL12, x3
+# CHECK-NEXT: 1 1 0.20 U msr ZCR_EL2, x3
+# CHECK-NEXT: 1 1 0.20 U msr ZCR_EL3, x3
# CHECK-NEXT: 1 4 1.00 mul z0.b, p7/m, z0.b, z31.b
# CHECK-NEXT: 1 4 1.00 mul z0.b, z1.b, z2.b
# CHECK-NEXT: 2 5 2.00 mul z0.d, p7/m, z0.d, z31.d
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-writeback.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-writeback.s
index 5ffaf9138d482..dee46a304582b 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-writeback.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-writeback.s
@@ -1185,10 +1185,10 @@ add x0, x27, 1
# CHECK-NEXT: Total Cycles: 508
# CHECK-NEXT: Total uOps: 1500
-# CHECK: Dispatch Width: 10
+# CHECK: Dispatch Width: 5
# CHECK-NEXT: uOps Per Cycle: 2.95
# CHECK-NEXT: IPC: 1.97
-# CHECK-NEXT: Block RThroughput: 2.5
+# CHECK-NEXT: Block RThroughput: 3.0
# CHECK: Timeline view:
# CHECK-NEXT: 012
@@ -1197,13 +1197,13 @@ add x0, x27, 1
# CHECK: [0,0] DeeeeeeER . . ld1 { v1.1d }, [x27], #8
# CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1
# CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.2d }, [x27], #16
-# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1
-# CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.2s }, [x27], #8
-# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D==eeeeeeER. ld1 { v1.4h }, [x27], #8
-# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1
-# CHECK-NEXT: [0,8] .D===eeeeeeER ld1 { v1.4s }, [x27], #16
-# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1
+# CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D=eeeeeeER . ld1 { v1.2s }, [x27], #8
+# CHECK-NEXT: [0,5] .D==eE----R . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D=eeeeeeER. ld1 { v1.4h }, [x27], #8
+# CHECK-NEXT: [0,7] . D==eE----R. add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==eeeeeeER ld1 { v1.4s }, [x27], #16
+# CHECK-NEXT: [0,9] . D==eE----R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1215,14 +1215,14 @@ add x0, x27, 1
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.1d }, [x27], #8
# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.2d }, [x27], #16
-# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.2s }, [x27], #8
-# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.4h }, [x27], #8
-# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.4s }, [x27], #16
-# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 1 3.1 0.1 2.0 <total>
+# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld1 { v1.2s }, [x27], #8
+# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 2.0 0.0 0.0 ld1 { v1.4h }, [x27], #8
+# CHECK-NEXT: 7. 1 3.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld1 { v1.4s }, [x27], #16
+# CHECK-NEXT: 9. 1 3.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 1 2.3 0.1 2.0 <total>
# CHECK: [1] Code Region - G02
@@ -1231,10 +1231,10 @@ add x0, x27, 1
# CHECK-NEXT: Total Cycles: 508
# CHECK-NEXT: Total uOps: 1500
-# CHECK: Dispatch Width: 10
+# CHECK: Dispatch Width: 5
# CHECK-NEXT: uOps Per Cycle: 2.95
# CHECK-NEXT: IPC: 1.97
-# CHECK-NEXT: Block RThroughput: 2.5
+# CHECK-NEXT: Block RThroughput: 3.0
# CHECK: Timeline view:
# CHECK-NEXT: 012
@@ -1243,13 +1243,13 @@ add x0, x27, 1
# CHECK: [0,0] DeeeeeeER . . ld1 { v1.8b }, [x27], #8
# CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1
# CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.8h }, [x27], #16
-# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1
-# CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.16b }, [x27], #16
-# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D==eeeeeeER. ld1 { v1.1d }, [x27], x28
-# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1
-# CHECK-NEXT: [0,8] .D===eeeeeeER ld1 { v1.2d }, [x27], x28
-# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1
+# CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D=eeeeeeER . ld1 { v1.16b }, [x27], #16
+# CHECK-NEXT: [0,5] .D==eE----R . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D=eeeeeeER. ld1 { v1.1d }, [x27], x28
+# CHECK-NEXT: [0,7] . D==eE----R. add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==eeeeeeER ld1 { v1.2d }, [x27], x28
+# CHECK-NEXT: [0,9] . D==eE----R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1261,14 +1261,14 @@ add x0, x27, 1
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8b }, [x27], #8
# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.8h }, [x27], #16
-# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.16b }, [x27], #16
-# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.1d }, [x27], x28
-# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.2d }, [x27], x28
-# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 1 3.1 0.1 2.0 <total>
+# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld1 { v1.16b }, [x27], #16
+# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 2.0 0.0 0.0 ld1 { v1.1d }, [x27], x28
+# CHECK-NEXT: 7. 1 3.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld1 { v1.2d }, [x27], x28
+# CHECK-NEXT: 9. 1 3.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 1 2.3 0.1 2.0 <total>
# CHECK: [2] Code Region - G03
@@ -1277,10 +1277,10 @@ add x0, x27, 1
# CHECK-NEXT: Total Cycles: 508
# CHECK-NEXT: Total uOps: 1500
-# CHECK: Dispatch Width: 10
+# CHECK: Dispatch Width: 5
# CHECK-NEXT: uOps Per Cycle: 2.95
# CHECK-NEXT: IPC: 1.97
-# CHECK-NEXT: Block RThroughput: 2.5
+# CHECK-NEXT: Block RThroughput: 3.0
# CHECK: Timeline view:
# CHECK-NEXT: 012
@@ -1289,13 +1289,13 @@ add x0, x27, 1
# CHECK: [0,0] DeeeeeeER . . ld1 { v1.2s }, [x27], x28
# CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1
# CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.4h }, [x27], x28
-# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1
-# CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.4s }, [x27], x28
-# CHECK-NEXT: [0,5] D===eE----R . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D==eeeeeeER. ld1 { v1.8b }, [x27], x28
-# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1
-# CHECK-NEXT: [0,8] .D===eeeeeeER ld1 { v1.8h }, [x27], x28
-# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1
+# CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1
+# CHECK-NEXT: [0,4] .D=eeeeeeER . ld1 { v1.4s }, [x27], x28
+# CHECK-NEXT: [0,5] .D==eE----R . add x0, x27, #1
+# CHECK-NEXT: [0,6] . D=eeeeeeER. ld1 { v1.8b }, [x27], x28
+# CHECK-NEXT: [0,7] . D==eE----R. add x0, x27, #1
+# CHECK-NEXT: [0,8] . D==eeeeeeER ld1 { v1.8h }, [x27], x28
+# CHECK-NEXT: [0,9] . D==eE----R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1307,14 +1307,14 @@ add x0, x27, 1
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2s }, [x27], x28
# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1
# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.4h }, [x27], x28
-# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.4s }, [x27], x28
-# CHECK-NEXT: 5. 1 4.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.8b }, [x27], x28
-# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.8h }, [x27], x28
-# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 1 3.1 0.1 2.0 <total>
+# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld1 { v1.4s }, [x27], x28
+# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 2.0 0.0 0.0 ld1 { v1.8b }, [x27], x28
+# CHECK-NEXT: 7. 1 3.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld1 { v1.8h }, [x27], x28
+# CHECK-NEXT: 9. 1 3.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 1 2.3 0.1 2.0 <total>
# CHECK: [3] Code Region - G04
@@ -1323,10 +1323,10 @@ add x0, x27, 1
# CHECK-NEXT: Total Cycles: 508
# CHECK-NEXT: Total uOps: 1900
-# CHECK: Dispatch Width: 10
+# CHECK: Dispatch Width: 5
# CHECK-NEXT: uOps Per Cycle: 3.74
# CHECK-NEXT: IPC: 1.97
-# CHECK-NEXT: Block RThroughput: 3.0
+# CHECK-NEXT: Block RThroughput: 3.8
# CHECK: Timeline view:
# CHECK-NEXT: 012
@@ -1334,14 +1334,14 @@ add x0, x27, 1
# CHECK: [0,0] DeeeeeeER . . ld1 { v1.16b }, [x27], x28
# CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1
-# CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.1d, v2.1d }, [x27], #16
-# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1
-# CHECK-NEXT: [0,4] D==eeeeeeER . ld1 { v1.2d, v2.2d }, [x27], #32
-# CHECK-NEXT: [0,5] .D==eE----R . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D==eeeeeeER. ld1 { v1.2s, v2.2s }, [x27], #16
-# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1
-# CHECK-NEXT: [0,8] .D===eeeeeeER ld1 { v1.4h, v2.4h }, [x27], #16
-# CHECK-NEXT: [0,9] .D====eE----R add x0, x27, #1
+# CHECK-NEXT: [0,2] .DeeeeeeER. . ld1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1
+# CHECK-NEXT: [0,4] . DeeeeeeER . ld1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: [0,5] . D=eE----R . add x0, x27, #1
+# CHECK-NEXT: [0,6] . DeeeeeeER. ld1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: [0,7] . D=eE----R. add x0, x27, #1
+# CHECK-NEXT: [0,8] . DeeeeeeER ld1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: [0,9] . D=eE----R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1352,15 +1352,15 @@ add x0, x27, 1
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.16b }, [x27], x28
# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], #16
-# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld1 { v1.2d, v2.2d }, [x27], #32
-# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], #16
-# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 4.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], #16
-# CHECK-NEXT: 9. 1 5.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 1 3.0 0.1 2.0 <total>
+# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], #16
+# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld1 { v1.2d, v2.2d }, [x27], #32
+# CHECK-NEXT: 5. 1 2.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], #16
+# CHECK-NEXT: 7. 1 2.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], #16
+# CHECK-NEXT: 9. 1 2.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 1 1.5 0.1 2.0 <total>
# CHECK: [4] Code Region - G05
@@ -1369,10 +1369,10 @@ add x0, x27, 1
# CHECK-NEXT: Total Cycles: 508
# CHECK-NEXT: Total uOps: 2000
-# CHECK: Dispatch Width: 10
+# CHECK: Dispatch Width: 5
# CHECK-NEXT: uOps Per Cycle: 3.94
# CHECK-NEXT: IPC: 1.97
-# CHECK-NEXT: Block RThroughput: 3.3
+# CHECK-NEXT: Block RThroughput: 4.0
# CHECK: Timeline view:
# CHECK-NEXT: 012
@@ -1380,14 +1380,14 @@ add x0, x27, 1
# CHECK: [0,0] DeeeeeeER . . ld1 { v1.4s, v2.4s }, [x27], #32
# CHECK-NEXT: [0,1] D=eE----R . . add x0, x27, #1
-# CHECK-NEXT: [0,2] D=eeeeeeER. . ld1 { v1.8b, v2.8b }, [x27], #16
-# CHECK-NEXT: [0,3] D==eE----R. . add x0, x27, #1
-# CHECK-NEXT: [0,4] .D=eeeeeeER . ld1 { v1.8h, v2.8h }, [x27], #32
-# CHECK-NEXT: [0,5] .D==eE----R . add x0, x27, #1
-# CHECK-NEXT: [0,6] .D==eeeeeeER. ld1 { v1.16b, v2.16b }, [x27], #32
-# CHECK-NEXT: [0,7] .D===eE----R. add x0, x27, #1
-# CHECK-NEXT: [0,8] . D==eeeeeeER ld1 { v1.1d, v2.1d }, [x27], x28
-# CHECK-NEXT: [0,9] . D===eE----R add x0, x27, #1
+# CHECK-NEXT: [0,2] .DeeeeeeER. . ld1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: [0,3] .D=eE----R. . add x0, x27, #1
+# CHECK-NEXT: [0,4] . DeeeeeeER . ld1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: [0,5] . D=eE----R . add x0, x27, #1
+# CHECK-NEXT: [0,6] . DeeeeeeER. ld1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: [0,7] . D=eE----R. add x0, x27, #1
+# CHECK-NEXT: [0,8] . DeeeeeeER ld1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: [0,9] . D=eE----R add x0, x27, #1
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -1398,15 +1398,15 @@ add x0, x27, 1
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4s, v2.4s }, [x27], #32
# CHECK-NEXT: 1. 1 2.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], #16
-# CHECK-NEXT: 3. 1 3.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ld1 { v1.8h, v2.8h }, [x27], #32
-# CHECK-NEXT: 5. 1 3.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 6. 1 3.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], #32
-# CHECK-NEXT: 7. 1 4.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 8. 1 3.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], x28
-# CHECK-NEXT: 9. 1 4.0 0.0 4.0 add x0, x27, #1
-# CHECK-NEXT: 1 2.7 0.1 2.0 <total>
+# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], #16
+# CHECK-NEXT: 3. 1 2.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 4. 1 1.0 0.0 0.0 ld1 { v1.8h, v2.8h }, [x27], #32
+# CHECK-NEXT: 5. 1 2.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 6. 1 1.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], #32
+# CHECK-NEXT: 7. 1 2.0 0.0 4.0 add x0, x27, #1
+# CHECK-NEXT: 8. 1 1.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], x28
+# CHECK-NEXT: 9. 1 2.0 0.0 4.0 ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/145717
More information about the llvm-commits
mailing list