[llvm] [X86] Complete AMD znver4 AVX512 zeroing idioms (PR #108740)

via llvm-commits llvm-commits at lists.llvm.org
Sun Sep 15 00:52:12 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-x86

Author: Aiden Grossman (boomanaiden154)

<details>
<summary>Changes</summary>

This patch completes scheduling information for the AVX512 zeroing idioms according to the znver4 software optimization guide.

---

Patch is 115.51 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/108740.diff


4 Files Affected:

- (modified) llvm/lib/Target/X86/X86ScheduleZnver4.td (+68-15) 
- (modified) llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s (+13-13) 
- (modified) llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512bw.s (+9-9) 
- (modified) llvm/test/tools/llvm-mca/X86/Znver4/zero-idioms.s (+355-355) 


``````````diff
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver4.td b/llvm/lib/Target/X86/X86ScheduleZnver4.td
index 6181ee841dd411..9763b651ff0cae 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver4.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver4.td
@@ -1839,35 +1839,59 @@ def Zn4WriteFZeroIdiom : SchedWriteVariant<[
 ]>;
 // NOTE: XORPSrr, XORPDrr are not zero-cycle!
 def : InstRW<[Zn4WriteFZeroIdiom], (instrs VXORPSrr, VXORPDrr,
-                                           VANDNPSrr, VANDNPDrr)>;
+                                           VXORPSZ128rr,
+                                           VXORPDZ128rr,
+                                           VANDNPSrr, VANDNPDrr,
+                                           VANDNPSZ128rr,
+                                           VANDNPDZ128rr)>;
 
 def Zn4WriteFZeroIdiomY : SchedWriteVariant<[
     SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
     SchedVar<NoSchedPred,                          [WriteFLogicY]>
 ]>;
 def : InstRW<[Zn4WriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr,
-                                            VANDNPSYrr, VANDNPDYrr)>;
+                                            VXORPSZ256rr,
+                                            VXORPDZ256rr,
+                                            VANDNPSYrr, VANDNPDYrr,
+                                            VANDNPSZ256rr,
+                                            VANDNPDZ256rr)>;
+
+def Zn4WriteFZeroIdiomZ : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteFLogicZ]>
+]>;
+def : InstRW<[Zn4WriteFZeroIdiomZ], (instrs VXORPSZrr, VXORPDZrr,
+                                            VANDNPSZrr, VANDNPDZrr)>;
 
 def Zn4WriteVZeroIdiomLogicX : SchedWriteVariant<[
     SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
     SchedVar<NoSchedPred,                          [WriteVecLogicX]>
 ]>;
 // NOTE: PXORrr,PANDNrr are not zero-cycle!
-def : InstRW<[Zn4WriteVZeroIdiomLogicX], (instrs VPXORrr, VPANDNrr)>;
+def : InstRW<[Zn4WriteVZeroIdiomLogicX], (instrs VPXORrr,
+                                                 VPXORDZ128rr,
+                                                 VPXORQZ128rr,
+                                                 VPANDNrr,
+                                                 VPANDNDZ128rr,
+                                                 VPANDNQZ128rr)>;
 
-// TODO: This should be extended to incorporate all of the AVX512 zeroing
-// idioms that can be executed by the renamer.
-def Zn4WriteVZeroIdiomLogicZ : SchedWriteVariant<[
+def Zn4WriteVZeroIdiomLogicY : SchedWriteVariant<[
     SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
-    SchedVar<NoSchedPred,                          [WriteVecLogicZ]>
+    SchedVar<NoSchedPred,                          [WriteVecLogicY]>
 ]>;
-def : InstRW<[Zn4WriteVZeroIdiomLogicZ], (instrs VPXORDZrr)>;
+def : InstRW<[Zn4WriteVZeroIdiomLogicY], (instrs VPXORYrr,
+                                                 VPXORDZ256rr,
+                                                 VPXORQZ256rr,
+                                                 VPANDNYrr,
+                                                 VPANDNDZ256rr,
+                                                 VPANDNQZ256rr)>;
 
-def Zn4WriteVZeroIdiomLogicY : SchedWriteVariant<[
+def Zn4WriteVZeroIdiomLogicZ : SchedWriteVariant<[
     SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
-    SchedVar<NoSchedPred,                          [WriteVecLogicY]>
+    SchedVar<NoSchedPred,                          [WriteVecLogicZ]>
 ]>;
-def : InstRW<[Zn4WriteVZeroIdiomLogicY], (instrs VPXORYrr, VPANDNYrr)>;
+def : InstRW<[Zn4WriteVZeroIdiomLogicZ], (instrs VPXORDZrr, VPXORQZrr,
+                                                 VPANDNDZrr, VPANDNQZrr)>;
 
 def Zn4WriteVZeroIdiomALUX : SchedWriteVariant<[
     SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
@@ -1877,7 +1901,10 @@ def Zn4WriteVZeroIdiomALUX : SchedWriteVariant<[
 //       PCMPGTBrr, PCMPGTWrr, PCMPGTDrr, PCMPGTQrr are not zero-cycle!
 def : InstRW<[Zn4WriteVZeroIdiomALUX],
              (instrs VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr,
-                     VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr)>;
+                     VPSUBBZ128rr, VPSUBWZ128rr, VPSUBDZ128rr, VPSUBQZ128rr,
+                     VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr,
+                     VPCMPGTBZ128rr, VPCMPGTWZ128rr,
+                     VPCMPGTDZ128rr, VPCMPGTQZ128rr)>;
 
 def Zn4WriteVZeroIdiomALUY : SchedWriteVariant<[
     SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
@@ -1885,7 +1912,18 @@ def Zn4WriteVZeroIdiomALUY : SchedWriteVariant<[
 ]>;
 def : InstRW<[Zn4WriteVZeroIdiomALUY],
              (instrs VPSUBBYrr, VPSUBWYrr, VPSUBDYrr, VPSUBQYrr,
-                     VPCMPGTBYrr, VPCMPGTWYrr, VPCMPGTDYrr, VPCMPGTQYrr)>;
+                     VPSUBBZ256rr, VPSUBWZ256rr, VPSUBDZ256rr, VPSUBQZ256rr,
+                     VPCMPGTBYrr, VPCMPGTWYrr, VPCMPGTDYrr, VPCMPGTQYrr,
+                     VPCMPGTBZ256rr, VPCMPGTWZ256rr,
+                     VPCMPGTDZ256rr, VPCMPGTQZ256rr)>;
+
+def Zn4WriteVZeroIdiomALUZ : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
+    SchedVar<NoSchedPred,                          [WriteVecALUZ]>
+]>;
+def : InstRW<[Zn4WriteVZeroIdiomALUY],
+             (instrs VPSUBBZrr, VPSUBWZrr, VPSUBDZrr, VPSUBQZrr,
+                     VPCMPGTBZrr, VPCMPGTWZrr, VPCMPGTDZrr, VPCMPGTQZrr)>;
 
 def : IsZeroIdiomFunction<[
   // GPR Zero-idioms.
@@ -1940,9 +1978,24 @@ def : IsZeroIdiomFunction<[
   ], ZeroIdiomPredicate>,
 
   // AVX ZMM Zero-idioms.
-  // TODO: This should be expanded to incorporate all AVX512 zeroing idioms.
   DepBreakingClass<[
-    VPXORDZrr
+    // fp variants.
+    VXORPSZrr, VXORPDZrr,
+    VXORPSZ128rr, VXORPDZ128rr, VXORPSZ256rr, VXORPDZ256rr,
+    VANDNPSZrr, VANDNPDZrr,
+    VANDNPSZ128rr, VANDNPDZ128rr, VANDNPSZ256rr, VANDNPDZ256rr,
+
+    // int variants.
+    VPCMPGTBZrr, VPCMPGTWZrr, VPCMPGTDZrr, VPCMPGTQZrr,
+    VPCMPGTBZ128rr, VPCMPGTWZ128rr, VPCMPGTDZ128rr, VPCMPGTQZ128rr,
+    VPCMPGTBZ256rr, VPCMPGTWZ256rr, VPCMPGTDZ256rr, VPCMPGTQZ256rr,
+    VPANDNDZrr, VPANDNQZrr,
+    VPANDNDZ128rr, VPANDNQZ128rr, VPANDNDZ256rr, VPANDNQZ256rr,
+    VPXORDZrr, VPXORQZrr,
+    VPXORDZ128rr, VPXORQZ128rr, VPXORDZ256rr, VPXORQZ256rr,
+    VPSUBBZrr, VPSUBWZrr, VPSUBDZrr, VPSUBQZrr,
+    VPSUBBZ128rr, VPSUBWZ128rr, VPSUBDZ128rr, VPSUBQZ128rr,
+    VPSUBBZ256rr, VPSUBWZ256rr, VPSUBDZ256rr, VPSUBQZ256rr,
   ], ZeroIdiomPredicate>,
 ]>;
 
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s
index 6e52eddd9a8f5e..0c4c6567680017 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx512.s
@@ -1609,13 +1609,13 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  1      1     0.50                        vpcmpeqq	%zmm0, %zmm1, %k2 {%k3}
 # CHECK-NEXT:  1      8     0.50    *                   vpcmpeqq	(%rax), %zmm1, %k2 {%k3}
 # CHECK-NEXT:  1      8     0.50    *                   vpcmpeqq	(%rax){1to8}, %zmm1, %k2 {%k3}
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtd	%zmm0, %zmm1, %k2
+# CHECK-NEXT:  1      1     0.25                        vpcmpgtd	%zmm0, %zmm1, %k2
 # CHECK-NEXT:  1      8     0.50    *                   vpcmpgtd	(%rax), %zmm1, %k2
 # CHECK-NEXT:  1      8     0.50    *                   vpcmpgtd	(%rax){1to16}, %zmm1, %k2
 # CHECK-NEXT:  1      1     0.50                        vpcmpgtd	%zmm0, %zmm1, %k2 {%k3}
 # CHECK-NEXT:  1      8     0.50    *                   vpcmpgtd	(%rax), %zmm1, %k2 {%k3}
 # CHECK-NEXT:  1      8     0.50    *                   vpcmpgtd	(%rax){1to16}, %zmm1, %k2 {%k3}
-# CHECK-NEXT:  1      1     0.50                        vpcmpgtq	%zmm0, %zmm1, %k2
+# CHECK-NEXT:  1      1     0.25                        vpcmpgtq	%zmm0, %zmm1, %k2
 # CHECK-NEXT:  1      8     0.50    *                   vpcmpgtq	(%rax), %zmm1, %k2
 # CHECK-NEXT:  1      8     0.50    *                   vpcmpgtq	(%rax){1to8}, %zmm1, %k2
 # CHECK-NEXT:  1      1     0.50                        vpcmpgtq	%zmm0, %zmm1, %k2 {%k3}
@@ -1815,7 +1815,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  1      1     1.00                        vpshufd	$0, %zmm16, %zmm19 {%k1} {z}
 # CHECK-NEXT:  1      8     1.00    *                   vpshufd	$0, (%rax), %zmm19 {%k1} {z}
 # CHECK-NEXT:  1      8     1.00    *                   vpshufd	$0, (%rax){1to16}, %zmm19 {%k1} {z}
-# CHECK-NEXT:  1      1     0.50                        vpsubd	%zmm16, %zmm17, %zmm19
+# CHECK-NEXT:  1      1     0.25                        vpsubd	%zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  1      8     0.50    *                   vpsubd	(%rax), %zmm17, %zmm19
 # CHECK-NEXT:  1      8     0.50    *                   vpsubd	(%rax){1to16}, %zmm17, %zmm19
 # CHECK-NEXT:  1      1     0.50                        vpsubd	%zmm16, %zmm17, %zmm19 {%k1}
@@ -1824,7 +1824,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  1      1     0.50                        vpsubd	%zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  1      8     0.50    *                   vpsubd	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  1      8     0.50    *                   vpsubd	(%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
-# CHECK-NEXT:  1      1     0.50                        vpsubq	%zmm16, %zmm17, %zmm19
+# CHECK-NEXT:  1      1     0.25                        vpsubq	%zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  1      8     0.50    *                   vpsubq	(%rax), %zmm17, %zmm19
 # CHECK-NEXT:  1      8     0.50    *                   vpsubq	(%rax){1to8}, %zmm17, %zmm19
 # CHECK-NEXT:  1      1     0.50                        vpsubq	%zmm16, %zmm17, %zmm19 {%k1}
@@ -1939,7 +1939,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  1      22    5.00    *                   vsqrtss	(%rax), %xmm17, %xmm19 {%k1}
 # CHECK-NEXT:  1      15    5.00                        vsqrtss	%xmm16, %xmm17, %xmm19 {%k1} {z}
 # CHECK-NEXT:  1      22    5.00    *                   vsqrtss	(%rax), %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT:  1      1     0.50                        vpsubd	%zmm16, %zmm17, %zmm19
+# CHECK-NEXT:  1      1     0.25                        vpsubd	%zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  1      8     0.50    *                   vpsubd	(%rax), %zmm17, %zmm19
 # CHECK-NEXT:  1      8     0.50    *                   vpsubd	(%rax){1to16}, %zmm17, %zmm19
 # CHECK-NEXT:  1      1     0.50                        vpsubd	%zmm16, %zmm17, %zmm19 {%k1}
@@ -1948,7 +1948,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  1      1     0.50                        vpsubd	%zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  1      8     0.50    *                   vpsubd	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  1      8     0.50    *                   vpsubd	(%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
-# CHECK-NEXT:  1      1     0.50                        vpsubq	%zmm16, %zmm17, %zmm19
+# CHECK-NEXT:  1      1     0.25                        vpsubq	%zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  1      8     0.50    *                   vpsubq	(%rax), %zmm17, %zmm19
 # CHECK-NEXT:  1      8     0.50    *                   vpsubq	(%rax){1to8}, %zmm17, %zmm19
 # CHECK-NEXT:  1      1     0.50                        vpsubq	%zmm16, %zmm17, %zmm19 {%k1}
@@ -2065,7 +2065,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT: 5.33   5.33   5.33    -      -      -      -      -     221.00 1060.50 618.00 352.50 297.00 297.00 17.00 205.33 205.33 205.33 194.33 194.33 194.33 16.50  16.50
+# CHECK-NEXT: 5.33   5.33   5.33    -      -      -      -      -     219.50 1059.00 616.50 351.00 297.00 297.00 17.00 205.33 205.33 205.33 194.33 194.33 194.33 16.50  16.50
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
@@ -2590,13 +2590,13 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -      -      -      -      -      -      -      -      -      -      -     vpcmpeqq	%zmm0, %zmm1, %k2 {%k3}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50   0.50   0.50   0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpcmpeqq	(%rax), %zmm1, %k2 {%k3}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50   0.50   0.50   0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpcmpeqq	(%rax){1to8}, %zmm1, %k2 {%k3}
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -      -      -      -      -      -      -      -      -      -      -     vpcmpgtd	%zmm0, %zmm1, %k2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -      -      -      -      -      -      -      -      -      -     vpcmpgtd	%zmm0, %zmm1, %k2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50   0.50   0.50   0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpcmpgtd	(%rax), %zmm1, %k2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50   0.50   0.50   0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpcmpgtd	(%rax){1to16}, %zmm1, %k2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -      -      -      -      -      -      -      -      -      -      -     vpcmpgtd	%zmm0, %zmm1, %k2 {%k3}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50   0.50   0.50   0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpcmpgtd	(%rax), %zmm1, %k2 {%k3}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50   0.50   0.50   0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpcmpgtd	(%rax){1to16}, %zmm1, %k2 {%k3}
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -      -      -      -      -      -      -      -      -      -      -     vpcmpgtq	%zmm0, %zmm1, %k2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -      -      -      -      -      -      -      -      -      -     vpcmpgtq	%zmm0, %zmm1, %k2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50   0.50   0.50   0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpcmpgtq	(%rax), %zmm1, %k2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50   0.50   0.50   0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpcmpgtq	(%rax){1to8}, %zmm1, %k2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -      -      -      -      -      -      -      -      -      -      -     vpcmpgtq	%zmm0, %zmm1, %k2 {%k3}
@@ -2796,7 +2796,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00   1.00    -      -      -      -      -      -      -      -      -      -      -      -     vpshufd	$0, %zmm16, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00   1.00    -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpshufd	$0, (%rax), %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     1.00   1.00    -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpshufd	$0, (%rax){1to16}, %zmm19 {%k1} {z}
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -      -      -      -      -      -      -      -      -      -      -     vpsubd	%zmm16, %zmm17, %zmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -      -      -      -      -      -      -      -      -      -     vpsubd	%zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50   0.50   0.50   0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpsubd	(%rax), %zmm17, %zmm19
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50   0.50   0.50   0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpsubd	(%rax){1to16}, %zmm17, %zmm19
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -      -      -      -      -      -      -      -      -      -      -     vpsubd	%zmm16, %zmm17, %zmm19 {%k1}
@@ -2805,7 +2805,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -      -      -      -      -      -      -      -      -      -      -     vpsubd	%zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50   0.50   0.50   0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpsubd	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50   0.50   0.50   0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpsubd	(%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -      -      -      -      -      -      -      -      -      -      -     vpsubq	%zmm16, %zmm17, %zmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -      -      -      -      -      -      -      -      -      -     vpsubq	%zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50   0.50   0.50   0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpsubq	(%rax), %zmm17, %zmm19
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50   0.50   0.50   0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vpsubq	(%rax){1to8}, %zmm17, %zmm19
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -      -      -      -      -      -      -      -      -      -      -     vpsubq	%zmm16, %zmm17, %zmm19 {%k1}
@@ -2920,7 +2920,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     5.00    -      -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vsqrtss	(%rax), %xmm17, %xmm19 {%k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     5.00    -      -      -      -      -      -      -      -      -      -      -      -      -     vsqrtss	%xmm16, %xmm17, %xmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -     5.00    -      -     0.50   0.50    -     0.33   0.33   0.33   0.33   0.33   0.33    -      -     vsqrtss	(%rax), %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -      -      -      -      -      -      -      -      -      -      -     vpsubd	%zmm16, %zmm17, %zmm19
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -      -      -      -      -      -      -      -      -      -     vpsubd	%zmm16, %...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/108740


More information about the llvm-commits mailing list