[llvm] [X86] Complete AMD znver4 AVX512 zeroing idioms (PR #108740)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 15 00:52:12 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Aiden Grossman (boomanaiden154)
<details>
<summary>Changes</summary>
This patch completes scheduling information for the AVX512 zeroing idioms according to the znver4 software optimization guide.
---
Patch is 115.51 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/108740.diff
4 Files Affected:
- (modified) llvm/lib/Target/X86/X86ScheduleZnver4.td (+68-15)
- (modified) llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s (+13-13)
- (modified) llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512bw.s (+9-9)
- (modified) llvm/test/tools/llvm-mca/X86/Znver4/zero-idioms.s (+355-355)
``````````diff
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver4.td b/llvm/lib/Target/X86/X86ScheduleZnver4.td
index 6181ee841dd411..9763b651ff0cae 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver4.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver4.td
@@ -1839,35 +1839,59 @@ def Zn4WriteFZeroIdiom : SchedWriteVariant<[
]>;
// NOTE: XORPSrr, XORPDrr are not zero-cycle!
def : InstRW<[Zn4WriteFZeroIdiom], (instrs VXORPSrr, VXORPDrr,
- VANDNPSrr, VANDNPDrr)>;
+ VXORPSZ128rr,
+ VXORPDZ128rr,
+ VANDNPSrr, VANDNPDrr,
+ VANDNPSZ128rr,
+ VANDNPDZ128rr)>;
def Zn4WriteFZeroIdiomY : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
SchedVar<NoSchedPred, [WriteFLogicY]>
]>;
def : InstRW<[Zn4WriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr,
- VANDNPSYrr, VANDNPDYrr)>;
+ VXORPSZ256rr,
+ VXORPDZ256rr,
+ VANDNPSYrr, VANDNPDYrr,
+ VANDNPSZ256rr,
+ VANDNPDZ256rr)>;
+
+def Zn4WriteFZeroIdiomZ : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteFLogicZ]>
+]>;
+def : InstRW<[Zn4WriteFZeroIdiomZ], (instrs VXORPSZrr, VXORPDZrr,
+ VANDNPSZrr, VANDNPDZrr)>;
def Zn4WriteVZeroIdiomLogicX : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
SchedVar<NoSchedPred, [WriteVecLogicX]>
]>;
// NOTE: PXORrr,PANDNrr are not zero-cycle!
-def : InstRW<[Zn4WriteVZeroIdiomLogicX], (instrs VPXORrr, VPANDNrr)>;
+def : InstRW<[Zn4WriteVZeroIdiomLogicX], (instrs VPXORrr,
+ VPXORDZ128rr,
+ VPXORQZ128rr,
+ VPANDNrr,
+ VPANDNDZ128rr,
+ VPANDNQZ128rr)>;
-// TODO: This should be extended to incorporate all of the AVX512 zeroing
-// idioms that can be executed by the renamer.
-def Zn4WriteVZeroIdiomLogicZ : SchedWriteVariant<[
+def Zn4WriteVZeroIdiomLogicY : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
- SchedVar<NoSchedPred, [WriteVecLogicZ]>
+ SchedVar<NoSchedPred, [WriteVecLogicY]>
]>;
-def : InstRW<[Zn4WriteVZeroIdiomLogicZ], (instrs VPXORDZrr)>;
+def : InstRW<[Zn4WriteVZeroIdiomLogicY], (instrs VPXORYrr,
+ VPXORDZ256rr,
+ VPXORQZ256rr,
+ VPANDNYrr,
+ VPANDNDZ256rr,
+ VPANDNQZ256rr)>;
-def Zn4WriteVZeroIdiomLogicY : SchedWriteVariant<[
+def Zn4WriteVZeroIdiomLogicZ : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
- SchedVar<NoSchedPred, [WriteVecLogicY]>
+ SchedVar<NoSchedPred, [WriteVecLogicZ]>
]>;
-def : InstRW<[Zn4WriteVZeroIdiomLogicY], (instrs VPXORYrr, VPANDNYrr)>;
+def : InstRW<[Zn4WriteVZeroIdiomLogicZ], (instrs VPXORDZrr, VPXORQZrr,
+ VPANDNDZrr, VPANDNQZrr)>;
def Zn4WriteVZeroIdiomALUX : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
@@ -1877,7 +1901,10 @@ def Zn4WriteVZeroIdiomALUX : SchedWriteVariant<[
// PCMPGTBrr, PCMPGTWrr, PCMPGTDrr, PCMPGTQrr are not zero-cycle!
def : InstRW<[Zn4WriteVZeroIdiomALUX],
(instrs VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr,
- VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr)>;
+ VPSUBBZ128rr, VPSUBWZ128rr, VPSUBDZ128rr, VPSUBQZ128rr,
+ VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr,
+ VPCMPGTBZ128rr, VPCMPGTWZ128rr,
+ VPCMPGTDZ128rr, VPCMPGTQZ128rr)>;
def Zn4WriteVZeroIdiomALUY : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
@@ -1885,7 +1912,18 @@ def Zn4WriteVZeroIdiomALUY : SchedWriteVariant<[
]>;
def : InstRW<[Zn4WriteVZeroIdiomALUY],
(instrs VPSUBBYrr, VPSUBWYrr, VPSUBDYrr, VPSUBQYrr,
- VPCMPGTBYrr, VPCMPGTWYrr, VPCMPGTDYrr, VPCMPGTQYrr)>;
+ VPSUBBZ256rr, VPSUBWZ256rr, VPSUBDZ256rr, VPSUBQZ256rr,
+ VPCMPGTBYrr, VPCMPGTWYrr, VPCMPGTDYrr, VPCMPGTQYrr,
+ VPCMPGTBZ256rr, VPCMPGTWZ256rr,
+ VPCMPGTDZ256rr, VPCMPGTQZ256rr)>;
+
+def Zn4WriteVZeroIdiomALUZ : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteVecALUZ]>
+]>;
+def : InstRW<[Zn4WriteVZeroIdiomALUY],
+ (instrs VPSUBBZrr, VPSUBWZrr, VPSUBDZrr, VPSUBQZrr,
+ VPCMPGTBZrr, VPCMPGTWZrr, VPCMPGTDZrr, VPCMPGTQZrr)>;
def : IsZeroIdiomFunction<[
// GPR Zero-idioms.
@@ -1940,9 +1978,24 @@ def : IsZeroIdiomFunction<[
], ZeroIdiomPredicate>,
// AVX ZMM Zero-idioms.
- // TODO: This should be expanded to incorporate all AVX512 zeroing idioms.
DepBreakingClass<[
- VPXORDZrr
+ // fp variants.
+ VXORPSZrr, VXORPDZrr,
+ VXORPSZ128rr, VXORPDZ128rr, VXORPSZ256rr, VXORPDZ256rr,
+ VANDNPSZrr, VANDNPDZrr,
+ VANDNPSZ128rr, VANDNPDZ128rr, VANDNPSZ256rr, VANDNPDZ256rr,
+
+ // int variants.
+ VPCMPGTBZrr, VPCMPGTWZrr, VPCMPGTDZrr, VPCMPGTQZrr,
+ VPCMPGTBZ128rr, VPCMPGTWZ128rr, VPCMPGTDZ128rr, VPCMPGTQZ128rr,
+ VPCMPGTBZ256rr, VPCMPGTWZ256rr, VPCMPGTDZ256rr, VPCMPGTQZ256rr,
+ VPANDNDZrr, VPANDNQZrr,
+ VPANDNDZ128rr, VPANDNQZ128rr, VPANDNDZ256rr, VPANDNQZ256rr,
+ VPXORDZrr, VPXORQZrr,
+ VPXORDZ128rr, VPXORQZ128rr, VPXORDZ256rr, VPXORQZ256rr,
+ VPSUBBZrr, VPSUBWZrr, VPSUBDZrr, VPSUBQZrr,
+ VPSUBBZ128rr, VPSUBWZ128rr, VPSUBDZ128rr, VPSUBQZ128rr,
+ VPSUBBZ256rr, VPSUBWZ256rr, VPSUBDZ256rr, VPSUBQZ256rr,
], ZeroIdiomPredicate>,
]>;
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s
index 6e52eddd9a8f5e..0c4c6567680017 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s
@@ -1609,13 +1609,13 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: 1 1 0.50 vpcmpeqq %zmm0, %zmm1, %k2 {%k3}
# CHECK-NEXT: 1 8 0.50 * vpcmpeqq (%rax), %zmm1, %k2 {%k3}
# CHECK-NEXT: 1 8 0.50 * vpcmpeqq (%rax){1to8}, %zmm1, %k2 {%k3}
-# CHECK-NEXT: 1 1 0.50 vpcmpgtd %zmm0, %zmm1, %k2
+# CHECK-NEXT: 1 1 0.25 vpcmpgtd %zmm0, %zmm1, %k2
# CHECK-NEXT: 1 8 0.50 * vpcmpgtd (%rax), %zmm1, %k2
# CHECK-NEXT: 1 8 0.50 * vpcmpgtd (%rax){1to16}, %zmm1, %k2
# CHECK-NEXT: 1 1 0.50 vpcmpgtd %zmm0, %zmm1, %k2 {%k3}
# CHECK-NEXT: 1 8 0.50 * vpcmpgtd (%rax), %zmm1, %k2 {%k3}
# CHECK-NEXT: 1 8 0.50 * vpcmpgtd (%rax){1to16}, %zmm1, %k2 {%k3}
-# CHECK-NEXT: 1 1 0.50 vpcmpgtq %zmm0, %zmm1, %k2
+# CHECK-NEXT: 1 1 0.25 vpcmpgtq %zmm0, %zmm1, %k2
# CHECK-NEXT: 1 8 0.50 * vpcmpgtq (%rax), %zmm1, %k2
# CHECK-NEXT: 1 8 0.50 * vpcmpgtq (%rax){1to8}, %zmm1, %k2
# CHECK-NEXT: 1 1 0.50 vpcmpgtq %zmm0, %zmm1, %k2 {%k3}
@@ -1815,7 +1815,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: 1 1 1.00 vpshufd $0, %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 8 1.00 * vpshufd $0, (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: 1 8 1.00 * vpshufd $0, (%rax){1to16}, %zmm19 {%k1} {z}
-# CHECK-NEXT: 1 1 0.50 vpsubd %zmm16, %zmm17, %zmm19
+# CHECK-NEXT: 1 1 0.25 vpsubd %zmm16, %zmm17, %zmm19
# CHECK-NEXT: 1 8 0.50 * vpsubd (%rax), %zmm17, %zmm19
# CHECK-NEXT: 1 8 0.50 * vpsubd (%rax){1to16}, %zmm17, %zmm19
# CHECK-NEXT: 1 1 0.50 vpsubd %zmm16, %zmm17, %zmm19 {%k1}
@@ -1824,7 +1824,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: 1 1 0.50 vpsubd %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 8 0.50 * vpsubd (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 8 0.50 * vpsubd (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
-# CHECK-NEXT: 1 1 0.50 vpsubq %zmm16, %zmm17, %zmm19
+# CHECK-NEXT: 1 1 0.25 vpsubq %zmm16, %zmm17, %zmm19
# CHECK-NEXT: 1 8 0.50 * vpsubq (%rax), %zmm17, %zmm19
# CHECK-NEXT: 1 8 0.50 * vpsubq (%rax){1to8}, %zmm17, %zmm19
# CHECK-NEXT: 1 1 0.50 vpsubq %zmm16, %zmm17, %zmm19 {%k1}
@@ -1939,7 +1939,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: 1 22 5.00 * vsqrtss (%rax), %xmm17, %xmm19 {%k1}
# CHECK-NEXT: 1 15 5.00 vsqrtss %xmm16, %xmm17, %xmm19 {%k1} {z}
# CHECK-NEXT: 1 22 5.00 * vsqrtss (%rax), %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT: 1 1 0.50 vpsubd %zmm16, %zmm17, %zmm19
+# CHECK-NEXT: 1 1 0.25 vpsubd %zmm16, %zmm17, %zmm19
# CHECK-NEXT: 1 8 0.50 * vpsubd (%rax), %zmm17, %zmm19
# CHECK-NEXT: 1 8 0.50 * vpsubd (%rax){1to16}, %zmm17, %zmm19
# CHECK-NEXT: 1 1 0.50 vpsubd %zmm16, %zmm17, %zmm19 {%k1}
@@ -1948,7 +1948,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: 1 1 0.50 vpsubd %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 8 0.50 * vpsubd (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 1 8 0.50 * vpsubd (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
-# CHECK-NEXT: 1 1 0.50 vpsubq %zmm16, %zmm17, %zmm19
+# CHECK-NEXT: 1 1 0.25 vpsubq %zmm16, %zmm17, %zmm19
# CHECK-NEXT: 1 8 0.50 * vpsubq (%rax), %zmm17, %zmm19
# CHECK-NEXT: 1 8 0.50 * vpsubq (%rax){1to8}, %zmm17, %zmm19
# CHECK-NEXT: 1 1 0.50 vpsubq %zmm16, %zmm17, %zmm19 {%k1}
@@ -2065,7 +2065,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT: 5.33 5.33 5.33 - - - - - 221.00 1060.50 618.00 352.50 297.00 297.00 17.00 205.33 205.33 205.33 194.33 194.33 194.33 16.50 16.50
+# CHECK-NEXT: 5.33 5.33 5.33 - - - - - 219.50 1059.00 616.50 351.00 297.00 297.00 17.00 205.33 205.33 205.33 194.33 194.33 194.33 16.50 16.50
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
@@ -2590,13 +2590,13 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - - vpcmpeqq %zmm0, %zmm1, %k2 {%k3}
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpcmpeqq (%rax), %zmm1, %k2 {%k3}
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpcmpeqq (%rax){1to8}, %zmm1, %k2 {%k3}
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - - vpcmpgtd %zmm0, %zmm1, %k2
+# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpcmpgtd %zmm0, %zmm1, %k2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpcmpgtd (%rax), %zmm1, %k2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpcmpgtd (%rax){1to16}, %zmm1, %k2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - - vpcmpgtd %zmm0, %zmm1, %k2 {%k3}
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpcmpgtd (%rax), %zmm1, %k2 {%k3}
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpcmpgtd (%rax){1to16}, %zmm1, %k2 {%k3}
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - - vpcmpgtq %zmm0, %zmm1, %k2
+# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpcmpgtq %zmm0, %zmm1, %k2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpcmpgtq (%rax), %zmm1, %k2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpcmpgtq (%rax){1to8}, %zmm1, %k2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - - vpcmpgtq %zmm0, %zmm1, %k2 {%k3}
@@ -2796,7 +2796,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - - - - - - - - - - - - vpshufd $0, %zmm16, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpshufd $0, (%rax), %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - - 1.00 1.00 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpshufd $0, (%rax){1to16}, %zmm19 {%k1} {z}
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - - vpsubd %zmm16, %zmm17, %zmm19
+# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpsubd %zmm16, %zmm17, %zmm19
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsubd (%rax), %zmm17, %zmm19
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsubd (%rax){1to16}, %zmm17, %zmm19
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - - vpsubd %zmm16, %zmm17, %zmm19 {%k1}
@@ -2805,7 +2805,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - - vpsubd %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsubd (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsubd (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - - vpsubq %zmm16, %zmm17, %zmm19
+# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpsubq %zmm16, %zmm17, %zmm19
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsubq (%rax), %zmm17, %zmm19
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpsubq (%rax){1to8}, %zmm17, %zmm19
# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - - vpsubq %zmm16, %zmm17, %zmm19 {%k1}
@@ -2920,7 +2920,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: - - - - - - - - - 5.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vsqrtss (%rax), %xmm17, %xmm19 {%k1}
# CHECK-NEXT: - - - - - - - - - 5.00 - - - - - - - - - - - - - vsqrtss %xmm16, %xmm17, %xmm19 {%k1} {z}
# CHECK-NEXT: - - - - - - - - - 5.00 - - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vsqrtss (%rax), %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 0.50 0.50 - - - - - - - - - - - vpsubd %zmm16, %zmm17, %zmm19
+# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpsubd %zmm16, %...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/108740
More information about the llvm-commits
mailing list