[llvm] fe13b81 - [X86][NFC] Pre-commit llvm-mca tests for PR51495.

Andrea Di Biagio via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 25 06:18:04 PDT 2021


Author: Andrea Di Biagio
Date: 2021-08-25T14:17:17+01:00
New Revision: fe13b81ed970469478744bbe1fc9d6e2f1bb6fa9

URL: https://github.com/llvm/llvm-project/commit/fe13b81ed970469478744bbe1fc9d6e2f1bb6fa9
DIFF: https://github.com/llvm/llvm-project/commit/fe13b81ed970469478744bbe1fc9d6e2f1bb6fa9.diff

LOG: [X86][NFC] Pre-commit llvm-mca tests for PR51495.

WriteIMulH reports an incorrect latency for RM variants of MULX.

Added: 
    llvm/test/tools/llvm-mca/X86/Haswell/mulx-hi-read-advance.s
    llvm/test/tools/llvm-mca/X86/SkylakeClient/mulx-hi-read-advance.s
    llvm/test/tools/llvm-mca/X86/Znver2/mulx-hi-read-advance.s
    llvm/test/tools/llvm-mca/X86/Znver3/mulx-hi-read-advance.s

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/tools/llvm-mca/X86/Haswell/mulx-hi-read-advance.s b/llvm/test/tools/llvm-mca/X86/Haswell/mulx-hi-read-advance.s
new file mode 100644
index 000000000000..43ba92424bc0
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/X86/Haswell/mulx-hi-read-advance.s
@@ -0,0 +1,140 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=haswell -timeline -iterations=1 < %s | FileCheck %s
+
+# PR51495: WriteIMulH reports an incorrect latency for the RM variants of MULX.
+
+# LLVM-MCA-BEGIN
+mulxl (%rdi), %eax, %ecx
+add %eax, %eax
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+mulxq (%rdi), %rax, %rcx
+add %rax, %rax
+# LLVM-MCA-END
+
+# CHECK:      [0] Code Region
+
+# CHECK:      Iterations:        1
+# CHECK-NEXT: Instructions:      2
+# CHECK-NEXT: Total Cycles:      12
+# CHECK-NEXT: Total uOps:        6
+
+# CHECK:      Dispatch Width:    4
+# CHECK-NEXT: uOps Per Cycle:    0.50
+# CHECK-NEXT: IPC:               0.17
+# CHECK-NEXT: Block RThroughput: 1.5
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  5      9     1.00    *                   mulxl	(%rdi), %eax, %ecx
+# CHECK-NEXT:  1      1     0.25                        addl	%eax, %eax
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0]   - HWDivider
+# CHECK-NEXT: [1]   - HWFPDivider
+# CHECK-NEXT: [2]   - HWPort0
+# CHECK-NEXT: [3]   - HWPort1
+# CHECK-NEXT: [4]   - HWPort2
+# CHECK-NEXT: [5]   - HWPort3
+# CHECK-NEXT: [6]   - HWPort4
+# CHECK-NEXT: [7]   - HWPort5
+# CHECK-NEXT: [8]   - HWPort6
+# CHECK-NEXT: [9]   - HWPort7
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
+# CHECK-NEXT:  -      -     1.00   1.00    -     1.00    -     1.00   1.00    -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
+# CHECK-NEXT:  -      -      -     1.00    -     1.00    -     1.00   1.00    -     mulxl	(%rdi), %eax, %ecx
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     addl	%eax, %eax
+
+# CHECK:      Timeline view:
+# CHECK-NEXT:                     01
+# CHECK-NEXT: Index     0123456789
+
+# CHECK:      [0,0]     DeeeeeeeeeER   mulxl	(%rdi), %eax, %ecx
+# CHECK-NEXT: [0,1]     .D==eE-----R   addl	%eax, %eax
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     1     1.0    1.0    0.0       mulxl	(%rdi), %eax, %ecx
+# CHECK-NEXT: 1.     1     3.0    0.0    5.0       addl	%eax, %eax
+# CHECK-NEXT:        1     2.0    0.5    2.5       <total>
+
+# CHECK:      [1] Code Region
+
+# CHECK:      Iterations:        1
+# CHECK-NEXT: Instructions:      2
+# CHECK-NEXT: Total Cycles:      12
+# CHECK-NEXT: Total uOps:        5
+
+# CHECK:      Dispatch Width:    4
+# CHECK-NEXT: uOps Per Cycle:    0.42
+# CHECK-NEXT: IPC:               0.17
+# CHECK-NEXT: Block RThroughput: 1.3
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  4      9     1.00    *                   mulxq	(%rdi), %rax, %rcx
+# CHECK-NEXT:  1      1     0.25                        addq	%rax, %rax
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0]   - HWDivider
+# CHECK-NEXT: [1]   - HWFPDivider
+# CHECK-NEXT: [2]   - HWPort0
+# CHECK-NEXT: [3]   - HWPort1
+# CHECK-NEXT: [4]   - HWPort2
+# CHECK-NEXT: [5]   - HWPort3
+# CHECK-NEXT: [6]   - HWPort4
+# CHECK-NEXT: [7]   - HWPort5
+# CHECK-NEXT: [8]   - HWPort6
+# CHECK-NEXT: [9]   - HWPort7
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
+# CHECK-NEXT:  -      -      -     1.00    -     1.00    -     1.00   1.00    -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
+# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -     1.00    -     mulxq	(%rdi), %rax, %rcx
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     addq	%rax, %rax
+
+# CHECK:      Timeline view:
+# CHECK-NEXT:                     01
+# CHECK-NEXT: Index     0123456789
+
+# CHECK:      [0,0]     DeeeeeeeeeER   mulxq	(%rdi), %rax, %rcx
+# CHECK-NEXT: [0,1]     .D==eE-----R   addq	%rax, %rax
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     1     1.0    1.0    0.0       mulxq	(%rdi), %rax, %rcx
+# CHECK-NEXT: 1.     1     3.0    0.0    5.0       addq	%rax, %rax
+# CHECK-NEXT:        1     2.0    0.5    2.5       <total>

diff  --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/mulx-hi-read-advance.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/mulx-hi-read-advance.s
new file mode 100644
index 000000000000..5d98fcdfbf7d
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/mulx-hi-read-advance.s
@@ -0,0 +1,140 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -timeline -iterations=1 < %s | FileCheck %s
+
+# PR51495: WriteIMulH reports an incorrect latency for the RM variants of MULX.
+
+# LLVM-MCA-BEGIN
+mulxl (%rdi), %eax, %ecx
+add %eax, %eax
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+mulxq (%rdi), %rax, %rcx
+add %rax, %rax
+# LLVM-MCA-END
+
+# CHECK:      [0] Code Region
+
+# CHECK:      Iterations:        1
+# CHECK-NEXT: Instructions:      2
+# CHECK-NEXT: Total Cycles:      12
+# CHECK-NEXT: Total uOps:        6
+
+# CHECK:      Dispatch Width:    6
+# CHECK-NEXT: uOps Per Cycle:    0.50
+# CHECK-NEXT: IPC:               0.17
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  5      9     1.00    *                   mulxl	(%rdi), %eax, %ecx
+# CHECK-NEXT:  1      1     0.25                        addl	%eax, %eax
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0]   - SKLDivider
+# CHECK-NEXT: [1]   - SKLFPDivider
+# CHECK-NEXT: [2]   - SKLPort0
+# CHECK-NEXT: [3]   - SKLPort1
+# CHECK-NEXT: [4]   - SKLPort2
+# CHECK-NEXT: [5]   - SKLPort3
+# CHECK-NEXT: [6]   - SKLPort4
+# CHECK-NEXT: [7]   - SKLPort5
+# CHECK-NEXT: [8]   - SKLPort6
+# CHECK-NEXT: [9]   - SKLPort7
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
+# CHECK-NEXT:  -      -     1.00   1.00    -     1.00    -     1.00   1.00    -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
+# CHECK-NEXT:  -      -      -     1.00    -     1.00    -     1.00   1.00    -     mulxl	(%rdi), %eax, %ecx
+# CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     addl	%eax, %eax
+
+# CHECK:      Timeline view:
+# CHECK-NEXT:                     01
+# CHECK-NEXT: Index     0123456789
+
+# CHECK:      [0,0]     DeeeeeeeeeER   mulxl	(%rdi), %eax, %ecx
+# CHECK-NEXT: [0,1]     D===eE-----R   addl	%eax, %eax
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     1     1.0    1.0    0.0       mulxl	(%rdi), %eax, %ecx
+# CHECK-NEXT: 1.     1     4.0    0.0    5.0       addl	%eax, %eax
+# CHECK-NEXT:        1     2.5    0.5    2.5       <total>
+
+# CHECK:      [1] Code Region
+
+# CHECK:      Iterations:        1
+# CHECK-NEXT: Instructions:      2
+# CHECK-NEXT: Total Cycles:      12
+# CHECK-NEXT: Total uOps:        5
+
+# CHECK:      Dispatch Width:    6
+# CHECK-NEXT: uOps Per Cycle:    0.42
+# CHECK-NEXT: IPC:               0.17
+# CHECK-NEXT: Block RThroughput: 1.0
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  4      9     1.00    *                   mulxq	(%rdi), %rax, %rcx
+# CHECK-NEXT:  1      1     0.25                        addq	%rax, %rax
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0]   - SKLDivider
+# CHECK-NEXT: [1]   - SKLFPDivider
+# CHECK-NEXT: [2]   - SKLPort0
+# CHECK-NEXT: [3]   - SKLPort1
+# CHECK-NEXT: [4]   - SKLPort2
+# CHECK-NEXT: [5]   - SKLPort3
+# CHECK-NEXT: [6]   - SKLPort4
+# CHECK-NEXT: [7]   - SKLPort5
+# CHECK-NEXT: [8]   - SKLPort6
+# CHECK-NEXT: [9]   - SKLPort7
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
+# CHECK-NEXT:  -      -      -     1.00    -     1.00    -     1.00   1.00    -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
+# CHECK-NEXT:  -      -      -     1.00    -     1.00    -     1.00    -      -     mulxq	(%rdi), %rax, %rcx
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00    -     addq	%rax, %rax
+
+# CHECK:      Timeline view:
+# CHECK-NEXT:                     01
+# CHECK-NEXT: Index     0123456789
+
+# CHECK:      [0,0]     DeeeeeeeeeER   mulxq	(%rdi), %rax, %rcx
+# CHECK-NEXT: [0,1]     D===eE-----R   addq	%rax, %rax
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     1     1.0    1.0    0.0       mulxq	(%rdi), %rax, %rcx
+# CHECK-NEXT: 1.     1     4.0    0.0    5.0       addq	%rax, %rax
+# CHECK-NEXT:        1     2.5    0.5    2.5       <total>

diff  --git a/llvm/test/tools/llvm-mca/X86/Znver2/mulx-hi-read-advance.s b/llvm/test/tools/llvm-mca/X86/Znver2/mulx-hi-read-advance.s
new file mode 100644
index 000000000000..2cc0689ff24a
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/X86/Znver2/mulx-hi-read-advance.s
@@ -0,0 +1,144 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver2 -timeline -iterations=1 < %s | FileCheck %s
+
+# PR51495: WriteIMulH reports an incorrect latency for the RM variants of MULX.
+
+# LLVM-MCA-BEGIN
+mulxl (%rdi), %eax, %ecx
+add %eax, %eax
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+mulxq (%rdi), %rax, %rcx
+add %rax, %rax
+# LLVM-MCA-END
+
+# CHECK:      [0] Code Region
+
+# CHECK:      Iterations:        1
+# CHECK-NEXT: Instructions:      2
+# CHECK-NEXT: Total Cycles:      10
+# CHECK-NEXT: Total uOps:        2
+
+# CHECK:      Dispatch Width:    4
+# CHECK-NEXT: uOps Per Cycle:    0.20
+# CHECK-NEXT: IPC:               0.20
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      7     2.00    *                   mulxl	(%rdi), %eax, %ecx
+# CHECK-NEXT:  1      1     0.25                        addl	%eax, %eax
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0]   - Zn2AGU0
+# CHECK-NEXT: [1]   - Zn2AGU1
+# CHECK-NEXT: [2]   - Zn2AGU2
+# CHECK-NEXT: [3]   - Zn2ALU0
+# CHECK-NEXT: [4]   - Zn2ALU1
+# CHECK-NEXT: [5]   - Zn2ALU2
+# CHECK-NEXT: [6]   - Zn2ALU3
+# CHECK-NEXT: [7]   - Zn2Divider
+# CHECK-NEXT: [8]   - Zn2FPU0
+# CHECK-NEXT: [9]   - Zn2FPU1
+# CHECK-NEXT: [10]  - Zn2FPU2
+# CHECK-NEXT: [11]  - Zn2FPU3
+# CHECK-NEXT: [12]  - Zn2Multiplier
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]
+# CHECK-NEXT:  -      -     1.00    -     1.00    -     1.00    -      -      -      -      -     2.00
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   Instructions:
+# CHECK-NEXT:  -      -     1.00    -     1.00    -      -      -      -      -      -      -     2.00   mulxl	(%rdi), %eax, %ecx
+# CHECK-NEXT:  -      -      -      -      -      -     1.00    -      -      -      -      -      -     addl	%eax, %eax
+
+# CHECK:      Timeline view:
+# CHECK-NEXT: Index     0123456789
+
+# CHECK:      [0,0]     DeeeeeeeER   mulxl	(%rdi), %eax, %ecx
+# CHECK-NEXT: [0,1]     D===eE---R   addl	%eax, %eax
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     1     1.0    1.0    0.0       mulxl	(%rdi), %eax, %ecx
+# CHECK-NEXT: 1.     1     4.0    0.0    3.0       addl	%eax, %eax
+# CHECK-NEXT:        1     2.5    0.5    1.5       <total>
+
+# CHECK:      [1] Code Region
+
+# CHECK:      Iterations:        1
+# CHECK-NEXT: Instructions:      2
+# CHECK-NEXT: Total Cycles:      10
+# CHECK-NEXT: Total uOps:        2
+
+# CHECK:      Dispatch Width:    4
+# CHECK-NEXT: uOps Per Cycle:    0.20
+# CHECK-NEXT: IPC:               0.20
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      7     2.00    *                   mulxq	(%rdi), %rax, %rcx
+# CHECK-NEXT:  1      1     0.25                        addq	%rax, %rax
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0]   - Zn2AGU0
+# CHECK-NEXT: [1]   - Zn2AGU1
+# CHECK-NEXT: [2]   - Zn2AGU2
+# CHECK-NEXT: [3]   - Zn2ALU0
+# CHECK-NEXT: [4]   - Zn2ALU1
+# CHECK-NEXT: [5]   - Zn2ALU2
+# CHECK-NEXT: [6]   - Zn2ALU3
+# CHECK-NEXT: [7]   - Zn2Divider
+# CHECK-NEXT: [8]   - Zn2FPU0
+# CHECK-NEXT: [9]   - Zn2FPU1
+# CHECK-NEXT: [10]  - Zn2FPU2
+# CHECK-NEXT: [11]  - Zn2FPU3
+# CHECK-NEXT: [12]  - Zn2Multiplier
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]
+# CHECK-NEXT:  -      -     1.00    -     1.00    -     1.00    -      -      -      -      -     2.00
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   Instructions:
+# CHECK-NEXT:  -      -     1.00    -     1.00    -      -      -      -      -      -      -     2.00   mulxq	(%rdi), %rax, %rcx
+# CHECK-NEXT:  -      -      -      -      -      -     1.00    -      -      -      -      -      -     addq	%rax, %rax
+
+# CHECK:      Timeline view:
+# CHECK-NEXT: Index     0123456789
+
+# CHECK:      [0,0]     DeeeeeeeER   mulxq	(%rdi), %rax, %rcx
+# CHECK-NEXT: [0,1]     D===eE---R   addq	%rax, %rax
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     1     1.0    1.0    0.0       mulxq	(%rdi), %rax, %rcx
+# CHECK-NEXT: 1.     1     4.0    0.0    3.0       addq	%rax, %rax
+# CHECK-NEXT:        1     2.5    0.5    1.5       <total>

diff  --git a/llvm/test/tools/llvm-mca/X86/Znver3/mulx-hi-read-advance.s b/llvm/test/tools/llvm-mca/X86/Znver3/mulx-hi-read-advance.s
new file mode 100644
index 000000000000..e1a44a5ade44
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/X86/Znver3/mulx-hi-read-advance.s
@@ -0,0 +1,166 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver3 -timeline -iterations=1 < %s | FileCheck %s
+
+# PR51495: WriteIMulH reports an incorrect latency for the RM variants of MULX.
+
+# LLVM-MCA-BEGIN
+mulxl (%rdi), %eax, %ecx
+add %eax, %eax
+# LLVM-MCA-END
+
+# LLVM-MCA-BEGIN
+mulxq (%rdi), %rax, %rcx
+add %rax, %rax
+# LLVM-MCA-END
+
+# CHECK:      [0] Code Region
+
+# CHECK:      Iterations:        1
+# CHECK-NEXT: Instructions:      2
+# CHECK-NEXT: Total Cycles:      11
+# CHECK-NEXT: Total uOps:        3
+
+# CHECK:      Dispatch Width:    6
+# CHECK-NEXT: uOps Per Cycle:    0.27
+# CHECK-NEXT: IPC:               0.18
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  2      8     2.00    *                   mulxl	(%rdi), %eax, %ecx
+# CHECK-NEXT:  1      1     0.25                        addl	%eax, %eax
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0]   - Zn3AGU0
+# CHECK-NEXT: [1]   - Zn3AGU1
+# CHECK-NEXT: [2]   - Zn3AGU2
+# CHECK-NEXT: [3]   - Zn3ALU0
+# CHECK-NEXT: [4]   - Zn3ALU1
+# CHECK-NEXT: [5]   - Zn3ALU2
+# CHECK-NEXT: [6]   - Zn3ALU3
+# CHECK-NEXT: [7]   - Zn3BRU1
+# CHECK-NEXT: [8]   - Zn3FPP0
+# CHECK-NEXT: [9]   - Zn3FPP1
+# CHECK-NEXT: [10]  - Zn3FPP2
+# CHECK-NEXT: [11]  - Zn3FPP3
+# CHECK-NEXT: [12.0] - Zn3FPP45
+# CHECK-NEXT: [12.1] - Zn3FPP45
+# CHECK-NEXT: [13]  - Zn3FPSt
+# CHECK-NEXT: [14.0] - Zn3LSU
+# CHECK-NEXT: [14.1] - Zn3LSU
+# CHECK-NEXT: [14.2] - Zn3LSU
+# CHECK-NEXT: [15.0] - Zn3Load
+# CHECK-NEXT: [15.1] - Zn3Load
+# CHECK-NEXT: [15.2] - Zn3Load
+# CHECK-NEXT: [16.0] - Zn3Store
+# CHECK-NEXT: [16.1] - Zn3Store
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
+# CHECK-NEXT:  -      -     1.00    -     2.00    -     1.00    -      -      -      -      -      -      -      -      -      -     1.00    -      -     1.00    -      -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
+# CHECK-NEXT:  -      -     1.00    -     2.00    -      -      -      -      -      -      -      -      -      -      -      -     1.00    -      -     1.00    -      -     mulxl	(%rdi), %eax, %ecx
+# CHECK-NEXT:  -      -      -      -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     addl	%eax, %eax
+
+# CHECK:      Timeline view:
+# CHECK-NEXT:                     0
+# CHECK-NEXT: Index     0123456789
+
+# CHECK:      [0,0]     DeeeeeeeeER   mulxl	(%rdi), %eax, %ecx
+# CHECK-NEXT: [0,1]     D====eE---R   addl	%eax, %eax
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     1     1.0    1.0    0.0       mulxl	(%rdi), %eax, %ecx
+# CHECK-NEXT: 1.     1     5.0    0.0    3.0       addl	%eax, %eax
+# CHECK-NEXT:        1     3.0    0.5    1.5       <total>
+
+# CHECK:      [1] Code Region
+
+# CHECK:      Iterations:        1
+# CHECK-NEXT: Instructions:      2
+# CHECK-NEXT: Total Cycles:      11
+# CHECK-NEXT: Total uOps:        3
+
+# CHECK:      Dispatch Width:    6
+# CHECK-NEXT: uOps Per Cycle:    0.27
+# CHECK-NEXT: IPC:               0.18
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  2      8     2.00    *                   mulxq	(%rdi), %rax, %rcx
+# CHECK-NEXT:  1      1     0.25                        addq	%rax, %rax
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0]   - Zn3AGU0
+# CHECK-NEXT: [1]   - Zn3AGU1
+# CHECK-NEXT: [2]   - Zn3AGU2
+# CHECK-NEXT: [3]   - Zn3ALU0
+# CHECK-NEXT: [4]   - Zn3ALU1
+# CHECK-NEXT: [5]   - Zn3ALU2
+# CHECK-NEXT: [6]   - Zn3ALU3
+# CHECK-NEXT: [7]   - Zn3BRU1
+# CHECK-NEXT: [8]   - Zn3FPP0
+# CHECK-NEXT: [9]   - Zn3FPP1
+# CHECK-NEXT: [10]  - Zn3FPP2
+# CHECK-NEXT: [11]  - Zn3FPP3
+# CHECK-NEXT: [12.0] - Zn3FPP45
+# CHECK-NEXT: [12.1] - Zn3FPP45
+# CHECK-NEXT: [13]  - Zn3FPSt
+# CHECK-NEXT: [14.0] - Zn3LSU
+# CHECK-NEXT: [14.1] - Zn3LSU
+# CHECK-NEXT: [14.2] - Zn3LSU
+# CHECK-NEXT: [15.0] - Zn3Load
+# CHECK-NEXT: [15.1] - Zn3Load
+# CHECK-NEXT: [15.2] - Zn3Load
+# CHECK-NEXT: [16.0] - Zn3Store
+# CHECK-NEXT: [16.1] - Zn3Store
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
+# CHECK-NEXT:  -      -     1.00    -     2.00    -     1.00    -      -      -      -      -      -      -      -      -      -     1.00    -      -     1.00    -      -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
+# CHECK-NEXT:  -      -     1.00    -     2.00    -      -      -      -      -      -      -      -      -      -      -      -     1.00    -      -     1.00    -      -     mulxq	(%rdi), %rax, %rcx
+# CHECK-NEXT:  -      -      -      -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     addq	%rax, %rax
+
+# CHECK:      Timeline view:
+# CHECK-NEXT:                     0
+# CHECK-NEXT: Index     0123456789
+
+# CHECK:      [0,0]     DeeeeeeeeER   mulxq	(%rdi), %rax, %rcx
+# CHECK-NEXT: [0,1]     D====eE---R   addq	%rax, %rax
+
+# CHECK:      Average Wait times (based on the timeline view):
+# CHECK-NEXT: [0]: Executions
+# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# CHECK:            [0]    [1]    [2]    [3]
+# CHECK-NEXT: 0.     1     1.0    1.0    0.0       mulxq	(%rdi), %rax, %rcx
+# CHECK-NEXT: 1.     1     5.0    0.0    3.0       addq	%rax, %rax
+# CHECK-NEXT:        1     3.0    0.5    1.5       <total>


        


More information about the llvm-commits mailing list