[llvm] d4d459e - [X86] AMD Zen 3: MULX w/ mem operand has the same throughput as with reg op
Roman Lebedev via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 27 03:27:45 PDT 2021
Author: Roman Lebedev
Date: 2021-08-27T13:27:05+03:00
New Revision: d4d459e7475b4bb0d15280f12ed669342fa5edcd
URL: https://github.com/llvm/llvm-project/commit/d4d459e7475b4bb0d15280f12ed669342fa5edcd
DIFF: https://github.com/llvm/llvm-project/commit/d4d459e7475b4bb0d15280f12ed669342fa5edcd.diff
LOG: [X86] AMD Zen 3: MULX w/ mem operand has the same throughput as with reg op
Exegesis is faulty and sometimes when measuring throughput^-1
produces snippets that have loop-carried dependencies,
which must be what caused me to incorrectly measure it originally.
After looking much more carefully, the inverse throughput should match
that of the MULX w/ reg op.
As per llvm-exegesis measurements.
Added:
Modified:
llvm/lib/Target/X86/X86ScheduleZnver3.td
llvm/test/tools/llvm-mca/X86/Znver3/mulx-hi-read-advance.s
llvm/test/tools/llvm-mca/X86/Znver3/mulx-read-advance.s
llvm/test/tools/llvm-mca/X86/Znver3/resources-bmi2.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver3.td b/llvm/lib/Target/X86/X86ScheduleZnver3.td
index 86f1b285fec2..be07c069aae1 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver3.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver3.td
@@ -618,30 +618,10 @@ defm : Zn3WriteResIntPair<WriteIMul16Imm, [Zn3Multiplier], 4, [4], 2>; // Intege
defm : Zn3WriteResIntPair<WriteIMul16Reg, [Zn3Multiplier], 3, [1], 1>; // Integer 16-bit multiplication by register.
defm : Zn3WriteResIntPair<WriteIMul32, [Zn3Multiplier], 3, [3], 2>; // Integer 32-bit multiplication.
defm : Zn3WriteResIntPair<WriteMULX32, [Zn3Multiplier], 3, [1], 2>; // Integer 32-bit Unsigned Multiply Without Affecting Flags.
-
-def Zn3MULX32rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3Multiplier]> {
- let Latency = !add(Znver3Model.LoadLatency, 3);
- let ResourceCycles = [1, 1, 2];
- let NumMicroOps = 2;
-}
-def : InstRW<[Zn3MULX32rm, WriteIMulHLd,
- ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
- ReadAfterLd], (instrs MULX32rm)>;
-
defm : Zn3WriteResIntPair<WriteIMul32Imm, [Zn3Multiplier], 3, [1], 1>; // Integer 32-bit multiplication by immediate.
defm : Zn3WriteResIntPair<WriteIMul32Reg, [Zn3Multiplier], 3, [1], 1>; // Integer 32-bit multiplication by register.
defm : Zn3WriteResIntPair<WriteIMul64, [Zn3Multiplier], 3, [3], 2>; // Integer 64-bit multiplication.
defm : Zn3WriteResIntPair<WriteMULX64, [Zn3Multiplier], 3, [1], 2>; // Integer 32-bit Unsigned Multiply Without Affecting Flags.
-
-def Zn3MULX64rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3Multiplier]> {
- let Latency = !add(Znver3Model.LoadLatency, 3);
- let ResourceCycles = [1, 1, 2];
- let NumMicroOps = 2;
-}
-def : InstRW<[Zn3MULX64rm, WriteIMulHLd,
- ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
- ReadAfterLd], (instrs MULX64rm)>;
-
defm : Zn3WriteResIntPair<WriteIMul64Imm, [Zn3Multiplier], 3, [1], 1>; // Integer 64-bit multiplication by immediate.
defm : Zn3WriteResIntPair<WriteIMul64Reg, [Zn3Multiplier], 3, [1], 1>; // Integer 64-bit multiplication by register.
defm : Zn3WriteResInt<WriteIMulHLd, [], !add(4, Znver3Model.LoadLatency), [], 0>; // Integer multiplication, high part.
diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/mulx-hi-read-advance.s b/llvm/test/tools/llvm-mca/X86/Znver3/mulx-hi-read-advance.s
index b19cf61f3a58..12d6f399d429 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver3/mulx-hi-read-advance.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver3/mulx-hi-read-advance.s
@@ -17,13 +17,13 @@ add %rax, %rax
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 2
-# CHECK-NEXT: Total Cycles: 12
+# CHECK-NEXT: Total Cycles: 11
# CHECK-NEXT: Total uOps: 3
# CHECK: Dispatch Width: 6
-# CHECK-NEXT: uOps Per Cycle: 0.25
-# CHECK-NEXT: IPC: 0.17
-# CHECK-NEXT: Block RThroughput: 2.0
+# CHECK-NEXT: uOps Per Cycle: 0.27
+# CHECK-NEXT: IPC: 0.18
+# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
@@ -34,7 +34,7 @@ add %rax, %rax
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 2 8 2.00 * mulxl (%rdi), %eax, %ecx
+# CHECK-NEXT: 2 8 1.00 * mulxl (%rdi), %eax, %ecx
# CHECK-NEXT: 1 1 0.25 addl %eax, %eax
# CHECK: Resources:
@@ -64,19 +64,19 @@ add %rax, %rax
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT: - - 1.00 - 2.00 - 1.00 - - - - - - - - - - 1.00 - - 1.00 - -
+# CHECK-NEXT: - - 1.00 - 1.00 - 1.00 - - - - - - - - - - 1.00 - - 1.00 - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
-# CHECK-NEXT: - - 1.00 - 2.00 - - - - - - - - - - - - 1.00 - - 1.00 - - mulxl (%rdi), %eax, %ecx
+# CHECK-NEXT: - - 1.00 - 1.00 - - - - - - - - - - - - 1.00 - - 1.00 - - mulxl (%rdi), %eax, %ecx
# CHECK-NEXT: - - - - - - 1.00 - - - - - - - - - - - - - - - - addl %eax, %eax
# CHECK: Timeline view:
-# CHECK-NEXT: 01
+# CHECK-NEXT: 0
# CHECK-NEXT: Index 0123456789
-# CHECK: [0,0] DeeeeeeeeER. mulxl (%rdi), %eax, %ecx
-# CHECK-NEXT: [0,1] D========eER addl %eax, %eax
+# CHECK: [0,0] DeeeeeeeeER mulxl (%rdi), %eax, %ecx
+# CHECK-NEXT: [0,1] D=======eER addl %eax, %eax
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -86,20 +86,20 @@ add %rax, %rax
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 mulxl (%rdi), %eax, %ecx
-# CHECK-NEXT: 1. 1 9.0 0.0 0.0 addl %eax, %eax
-# CHECK-NEXT: 1 5.0 0.5 0.0 <total>
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 addl %eax, %eax
+# CHECK-NEXT: 1 4.5 0.5 0.0 <total>
# CHECK: [1] Code Region
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 2
-# CHECK-NEXT: Total Cycles: 12
+# CHECK-NEXT: Total Cycles: 11
# CHECK-NEXT: Total uOps: 3
# CHECK: Dispatch Width: 6
-# CHECK-NEXT: uOps Per Cycle: 0.25
-# CHECK-NEXT: IPC: 0.17
-# CHECK-NEXT: Block RThroughput: 2.0
+# CHECK-NEXT: uOps Per Cycle: 0.27
+# CHECK-NEXT: IPC: 0.18
+# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
@@ -110,7 +110,7 @@ add %rax, %rax
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 2 8 2.00 * mulxq (%rdi), %rax, %rcx
+# CHECK-NEXT: 2 8 1.00 * mulxq (%rdi), %rax, %rcx
# CHECK-NEXT: 1 1 0.25 addq %rax, %rax
# CHECK: Resources:
@@ -140,19 +140,19 @@ add %rax, %rax
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT: - - 1.00 - 2.00 - 1.00 - - - - - - - - - - 1.00 - - 1.00 - -
+# CHECK-NEXT: - - 1.00 - 1.00 - 1.00 - - - - - - - - - - 1.00 - - 1.00 - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
-# CHECK-NEXT: - - 1.00 - 2.00 - - - - - - - - - - - - 1.00 - - 1.00 - - mulxq (%rdi), %rax, %rcx
+# CHECK-NEXT: - - 1.00 - 1.00 - - - - - - - - - - - - 1.00 - - 1.00 - - mulxq (%rdi), %rax, %rcx
# CHECK-NEXT: - - - - - - 1.00 - - - - - - - - - - - - - - - - addq %rax, %rax
# CHECK: Timeline view:
-# CHECK-NEXT: 01
+# CHECK-NEXT: 0
# CHECK-NEXT: Index 0123456789
-# CHECK: [0,0] DeeeeeeeeER. mulxq (%rdi), %rax, %rcx
-# CHECK-NEXT: [0,1] D========eER addq %rax, %rax
+# CHECK: [0,0] DeeeeeeeeER mulxq (%rdi), %rax, %rcx
+# CHECK-NEXT: [0,1] D=======eER addq %rax, %rax
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -162,5 +162,5 @@ add %rax, %rax
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 mulxq (%rdi), %rax, %rcx
-# CHECK-NEXT: 1. 1 9.0 0.0 0.0 addq %rax, %rax
-# CHECK-NEXT: 1 5.0 0.5 0.0 <total>
+# CHECK-NEXT: 1. 1 8.0 0.0 0.0 addq %rax, %rax
+# CHECK-NEXT: 1 4.5 0.5 0.0 <total>
diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/mulx-read-advance.s b/llvm/test/tools/llvm-mca/X86/Znver3/mulx-read-advance.s
index bbc5cfa39870..13ef5bcb11ca 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver3/mulx-read-advance.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver3/mulx-read-advance.s
@@ -15,13 +15,13 @@ mulxq (%rdi), %rax, %rdx
# CHECK: Iterations: 2
# CHECK-NEXT: Instructions: 2
-# CHECK-NEXT: Total Cycles: 14
+# CHECK-NEXT: Total Cycles: 15
# CHECK-NEXT: Total uOps: 4
# CHECK: Dispatch Width: 6
-# CHECK-NEXT: uOps Per Cycle: 0.29
-# CHECK-NEXT: IPC: 0.14
-# CHECK-NEXT: Block RThroughput: 2.0
+# CHECK-NEXT: uOps Per Cycle: 0.27
+# CHECK-NEXT: IPC: 0.13
+# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
@@ -32,7 +32,7 @@ mulxq (%rdi), %rax, %rdx
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 2 8 2.00 * mulxl (%rdi), %eax, %edx
+# CHECK-NEXT: 2 8 1.00 * mulxl (%rdi), %eax, %edx
# CHECK: Resources:
# CHECK-NEXT: [0] - Zn3AGU0
@@ -61,18 +61,18 @@ mulxq (%rdi), %rax, %rdx
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT: - 0.50 0.50 - 2.00 - - - - - - - - - - - 0.50 0.50 - 0.50 0.50 - -
+# CHECK-NEXT: - 0.50 0.50 - 1.00 - - - - - - - - - - - 0.50 0.50 - 0.50 0.50 - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
-# CHECK-NEXT: - 0.50 0.50 - 2.00 - - - - - - - - - - - 0.50 0.50 - 0.50 0.50 - - mulxl (%rdi), %eax, %edx
+# CHECK-NEXT: - 0.50 0.50 - 1.00 - - - - - - - - - - - 0.50 0.50 - 0.50 0.50 - - mulxl (%rdi), %eax, %edx
# CHECK: Timeline view:
-# CHECK-NEXT: 0123
+# CHECK-NEXT: 01234
# CHECK-NEXT: Index 0123456789
-# CHECK: [0,0] DeeeeeeeeER . mulxl (%rdi), %eax, %edx
-# CHECK-NEXT: [1,0] D===eeeeeeeeER mulxl (%rdi), %eax, %edx
+# CHECK: [0,0] DeeeeeeeeER . mulxl (%rdi), %eax, %edx
+# CHECK-NEXT: [1,0] D====eeeeeeeeER mulxl (%rdi), %eax, %edx
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -81,19 +81,19 @@ mulxq (%rdi), %rax, %rdx
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
-# CHECK-NEXT: 0. 2 2.5 0.5 0.0 mulxl (%rdi), %eax, %edx
+# CHECK-NEXT: 0. 2 3.0 0.5 0.0 mulxl (%rdi), %eax, %edx
# CHECK: [1] Code Region
# CHECK: Iterations: 2
# CHECK-NEXT: Instructions: 2
-# CHECK-NEXT: Total Cycles: 14
+# CHECK-NEXT: Total Cycles: 15
# CHECK-NEXT: Total uOps: 4
# CHECK: Dispatch Width: 6
-# CHECK-NEXT: uOps Per Cycle: 0.29
-# CHECK-NEXT: IPC: 0.14
-# CHECK-NEXT: Block RThroughput: 2.0
+# CHECK-NEXT: uOps Per Cycle: 0.27
+# CHECK-NEXT: IPC: 0.13
+# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
@@ -104,7 +104,7 @@ mulxq (%rdi), %rax, %rdx
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 2 8 2.00 * mulxq (%rdi), %rax, %rdx
+# CHECK-NEXT: 2 8 1.00 * mulxq (%rdi), %rax, %rdx
# CHECK: Resources:
# CHECK-NEXT: [0] - Zn3AGU0
@@ -133,18 +133,18 @@ mulxq (%rdi), %rax, %rdx
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT: - 0.50 0.50 - 2.00 - - - - - - - - - - - 0.50 0.50 - 0.50 0.50 - -
+# CHECK-NEXT: - 0.50 0.50 - 1.00 - - - - - - - - - - - 0.50 0.50 - 0.50 0.50 - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
-# CHECK-NEXT: - 0.50 0.50 - 2.00 - - - - - - - - - - - 0.50 0.50 - 0.50 0.50 - - mulxq (%rdi), %rax, %rdx
+# CHECK-NEXT: - 0.50 0.50 - 1.00 - - - - - - - - - - - 0.50 0.50 - 0.50 0.50 - - mulxq (%rdi), %rax, %rdx
# CHECK: Timeline view:
-# CHECK-NEXT: 0123
+# CHECK-NEXT: 01234
# CHECK-NEXT: Index 0123456789
-# CHECK: [0,0] DeeeeeeeeER . mulxq (%rdi), %rax, %rdx
-# CHECK-NEXT: [1,0] D===eeeeeeeeER mulxq (%rdi), %rax, %rdx
+# CHECK: [0,0] DeeeeeeeeER . mulxq (%rdi), %rax, %rdx
+# CHECK-NEXT: [1,0] D====eeeeeeeeER mulxq (%rdi), %rax, %rdx
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -153,4 +153,4 @@ mulxq (%rdi), %rax, %rdx
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
-# CHECK-NEXT: 0. 2 2.5 0.5 0.0 mulxq (%rdi), %rax, %rdx
+# CHECK-NEXT: 0. 2 3.0 0.5 0.0 mulxq (%rdi), %rax, %rdx
diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-bmi2.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-bmi2.s
index 1c2ccfc7cf83..8d00c99982b0 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-bmi2.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-bmi2.s
@@ -63,9 +63,9 @@ shrx %rax, (%rbx), %rcx
# CHECK-NEXT: 1 1 0.50 bzhiq %rax, %rbx, %rcx
# CHECK-NEXT: 2 5 0.50 * bzhiq %rax, (%rbx), %rcx
# CHECK-NEXT: 2 4 1.00 mulxl %eax, %ebx, %ecx
-# CHECK-NEXT: 2 8 2.00 * mulxl (%rax), %ebx, %ecx
+# CHECK-NEXT: 2 8 1.00 * mulxl (%rax), %ebx, %ecx
# CHECK-NEXT: 2 4 1.00 mulxq %rax, %rbx, %rcx
-# CHECK-NEXT: 2 8 2.00 * mulxq (%rax), %rbx, %rcx
+# CHECK-NEXT: 2 8 1.00 * mulxq (%rax), %rbx, %rcx
# CHECK-NEXT: 1 3 1.00 pdepl %eax, %ebx, %ecx
# CHECK-NEXT: 1 5 0.33 * pdepl (%rax), %ebx, %ecx
# CHECK-NEXT: 1 3 1.00 pdepq %rax, %rbx, %rcx
@@ -118,7 +118,7 @@ shrx %rax, (%rbx), %rcx
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT: 5.33 5.33 5.33 1.00 21.00 11.00 1.00 - - - - - - - - 5.33 5.33 5.33 5.33 5.33 5.33 - -
+# CHECK-NEXT: 5.33 5.33 5.33 1.00 19.00 11.00 1.00 - - - - - - - - 5.33 5.33 5.33 5.33 5.33 5.33 - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
@@ -127,9 +127,9 @@ shrx %rax, (%rbx), %rcx
# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - - - - - - - - - - - - bzhiq %rax, %rbx, %rcx
# CHECK-NEXT: 0.33 0.33 0.33 - 0.50 0.50 - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - bzhiq %rax, (%rbx), %rcx
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - - - - - - - - mulxl %eax, %ebx, %ecx
-# CHECK-NEXT: 0.33 0.33 0.33 - 2.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - mulxl (%rax), %ebx, %ecx
+# CHECK-NEXT: 0.33 0.33 0.33 - 1.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - mulxl (%rax), %ebx, %ecx
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - - - - - - - - mulxq %rax, %rbx, %rcx
-# CHECK-NEXT: 0.33 0.33 0.33 - 2.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - mulxq (%rax), %rbx, %rcx
+# CHECK-NEXT: 0.33 0.33 0.33 - 1.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - mulxq (%rax), %rbx, %rcx
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - - - - - - - - pdepl %eax, %ebx, %ecx
# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - pdepl (%rax), %ebx, %ecx
# CHECK-NEXT: - - - - 1.00 - - - - - - - - - - - - - - - - - - pdepq %rax, %rbx, %rcx
More information about the llvm-commits
mailing list