[llvm] [X86] Recognize VPXORDZrr as a zero-idiom on Znver4 (PR #108314)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 11 17:51:24 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Aiden Grossman (boomanaiden154)
<details>
<summary>Changes</summary>
This patch adds information about VPXORDZrr to the znver4 scheduling model, particularly that it is a zero-idiom.
This fixes a proximal cause of #<!-- -->108157.
---
Patch is 46.06 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/108314.diff
2 Files Affected:
- (modified) llvm/lib/Target/X86/X86ScheduleZnver4.td (+14)
- (modified) llvm/test/tools/llvm-mca/X86/Znver4/zero-idioms.s (+187-187)
``````````diff
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver4.td b/llvm/lib/Target/X86/X86ScheduleZnver4.td
index 420c42928c1c44..6181ee841dd411 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver4.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver4.td
@@ -1855,6 +1855,14 @@ def Zn4WriteVZeroIdiomLogicX : SchedWriteVariant<[
// NOTE: PXORrr,PANDNrr are not zero-cycle!
def : InstRW<[Zn4WriteVZeroIdiomLogicX], (instrs VPXORrr, VPANDNrr)>;
+// TODO: This should be extended to incorporate all of the AVX512 zeroing
+// idioms that can be executed by the renamer.
+def Zn4WriteVZeroIdiomLogicZ : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteVecLogicZ]>
+]>;
+def : InstRW<[Zn4WriteVZeroIdiomLogicZ], (instrs VPXORDZrr)>;
+
def Zn4WriteVZeroIdiomLogicY : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
SchedVar<NoSchedPred, [WriteVecLogicY]>
@@ -1930,6 +1938,12 @@ def : IsZeroIdiomFunction<[
VPSUBUSBYrr, VPSUBUSWYrr,
VPCMPGTBYrr, VPCMPGTWYrr, VPCMPGTDYrr, VPCMPGTQYrr
], ZeroIdiomPredicate>,
+
+ // AVX ZMM Zero-idioms.
+ // TODO: This should be expanded to incorporate all AVX512 zeroing idioms.
+ DepBreakingClass<[
+ VPXORDZrr
+ ], ZeroIdiomPredicate>,
]>;
def : IsDepBreakingFunction<[
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/zero-idioms.s b/llvm/test/tools/llvm-mca/X86/Znver4/zero-idioms.s
index cc3c286a2c638b..b6ebd93f681fe3 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/zero-idioms.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/zero-idioms.s
@@ -161,13 +161,13 @@ vpxorq %zmm19, %zmm19, %zmm21
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 139
-# CHECK-NEXT: Total Cycles: 42
+# CHECK-NEXT: Total Cycles: 40
# CHECK-NEXT: Total uOps: 139
# CHECK: Dispatch Width: 6
-# CHECK-NEXT: uOps Per Cycle: 3.31
-# CHECK-NEXT: IPC: 3.31
-# CHECK-NEXT: Block RThroughput: 25.8
+# CHECK-NEXT: uOps Per Cycle: 3.48
+# CHECK-NEXT: IPC: 3.48
+# CHECK-NEXT: Block RThroughput: 24.8
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
@@ -301,7 +301,7 @@ vpxorq %zmm19, %zmm19, %zmm21
# CHECK-NEXT: 1 1 0.25 vpxorq %xmm19, %xmm19, %xmm19
# CHECK-NEXT: 1 1 0.25 vpxord %ymm19, %ymm19, %ymm19
# CHECK-NEXT: 1 1 0.25 vpxorq %ymm19, %ymm19, %ymm19
-# CHECK-NEXT: 1 1 0.50 vpxord %zmm19, %zmm19, %zmm19
+# CHECK-NEXT: 1 0 0.17 vpxord %zmm19, %zmm19, %zmm19
# CHECK-NEXT: 1 1 0.50 vpxorq %zmm19, %zmm19, %zmm19
# CHECK-NEXT: 1 0 0.17 vxorps %xmm4, %xmm4, %xmm5
# CHECK-NEXT: 1 0 0.17 vxorpd %xmm1, %xmm1, %xmm3
@@ -315,17 +315,17 @@ vpxorq %zmm19, %zmm19, %zmm21
# CHECK-NEXT: 1 1 0.25 vpxorq %xmm19, %xmm19, %xmm21
# CHECK-NEXT: 1 1 0.25 vpxord %ymm19, %ymm19, %ymm21
# CHECK-NEXT: 1 1 0.25 vpxorq %ymm19, %ymm19, %ymm21
-# CHECK-NEXT: 1 1 0.50 vpxord %zmm19, %zmm19, %zmm21
+# CHECK-NEXT: 1 0 0.17 vpxord %zmm19, %zmm19, %zmm21
# CHECK-NEXT: 1 1 0.50 vpxorq %zmm19, %zmm19, %zmm21
# CHECK: Register File statistics:
-# CHECK-NEXT: Total number of mappings created: 65
-# CHECK-NEXT: Max number of mappings used: 45
+# CHECK-NEXT: Total number of mappings created: 63
+# CHECK-NEXT: Max number of mappings used: 43
# CHECK: * Register File #1 -- Zn4FpPRF:
# CHECK-NEXT: Number of physical registers: 192
-# CHECK-NEXT: Total number of mappings created: 65
-# CHECK-NEXT: Max number of mappings used: 45
+# CHECK-NEXT: Total number of mappings created: 63
+# CHECK-NEXT: Max number of mappings used: 43
# CHECK: * Register File #2 -- Zn4IntegerPRF:
# CHECK-NEXT: Number of physical registers: 224
@@ -359,7 +359,7 @@ vpxorq %zmm19, %zmm19, %zmm21
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT: - - - - - - - - 25.00 25.00 27.00 26.00 - - - - - - - - - - -
+# CHECK-NEXT: - - - - - - - - 24.00 25.00 25.00 25.00 - - - - - - - - - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
@@ -453,9 +453,9 @@ vpxorq %zmm19, %zmm19, %zmm21
# CHECK-NEXT: - - - - - - - - 1.00 - - - - - - - - - - - - - - vpandnd %xmm19, %xmm19, %xmm19
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - - - - vpandnq %xmm19, %xmm19, %xmm19
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - - - - vpandnd %ymm19, %ymm19, %ymm19
-# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - vpandnq %ymm19, %ymm19, %ymm19
-# CHECK-NEXT: - - - - - - - - 2.00 - - - - - - - - - - - - - - vpandnd %zmm19, %zmm19, %zmm19
-# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - - - - - - - - vpandnq %zmm19, %zmm19, %zmm19
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - - - - vpandnq %ymm19, %ymm19, %ymm19
+# CHECK-NEXT: - - - - - - - - - 2.00 - - - - - - - - - - - - - vpandnd %zmm19, %zmm19, %zmm19
+# CHECK-NEXT: - - - - - - - - 2.00 - - - - - - - - - - - - - - vpandnq %zmm19, %zmm19, %zmm19
# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vandnps %xmm2, %xmm2, %xmm5
# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vandnpd %xmm1, %xmm1, %xmm5
# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vpandn %xmm3, %xmm3, %xmm5
@@ -478,174 +478,174 @@ vpxorq %zmm19, %zmm19, %zmm21
# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vxorpd %ymm1, %ymm1, %ymm1
# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - - - - - - - - vxorps %zmm2, %zmm2, %zmm2
# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - - - - - - vxorpd %zmm1, %zmm1, %zmm1
-# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - - - - - - pxor %mm2, %mm2
-# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - pxor %xmm2, %xmm2
+# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - pxor %mm2, %mm2
+# CHECK-NEXT: - - - - - - - - 1.00 - - - - - - - - - - - - - - pxor %xmm2, %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vpxor %xmm3, %xmm3, %xmm3
# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vpxor %ymm3, %ymm3, %ymm3
# CHECK-NEXT: - - - - - - - - 1.00 - - - - - - - - - - - - - - vpxord %xmm19, %xmm19, %xmm19
# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - - - - vpxorq %xmm19, %xmm19, %xmm19
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - - - - - - vpxord %ymm19, %ymm19, %ymm19
# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - vpxorq %ymm19, %ymm19, %ymm19
-# CHECK-NEXT: - - - - - - - - 2.00 - - - - - - - - - - - - - - vpxord %zmm19, %zmm19, %zmm19
-# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - - - - - - - - vpxorq %zmm19, %zmm19, %zmm19
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vpxord %zmm19, %zmm19, %zmm19
+# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - - - - - - vpxorq %zmm19, %zmm19, %zmm19
# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vxorps %xmm4, %xmm4, %xmm5
# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vxorpd %xmm1, %xmm1, %xmm3
# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vxorps %ymm4, %ymm4, %ymm5
# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vxorpd %ymm1, %ymm1, %ymm3
-# CHECK-NEXT: - - - - - - - - 2.00 - - - - - - - - - - - - - - vxorps %zmm4, %zmm4, %zmm5
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - - - - - - vxorpd %zmm1, %zmm1, %zmm3
+# CHECK-NEXT: - - - - - - - - - 2.00 - - - - - - - - - - - - - vxorps %zmm4, %zmm4, %zmm5
+# CHECK-NEXT: - - - - - - - - 2.00 - - - - - - - - - - - - - - vxorpd %zmm1, %zmm1, %zmm3
# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vpxor %xmm3, %xmm3, %xmm5
# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vpxor %ymm3, %ymm3, %ymm5
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - - - - - - vpxord %xmm19, %xmm19, %xmm21
-# CHECK-NEXT: - - - - - - - - - 1.00 - - - - - - - - - - - - - vpxorq %xmm19, %xmm19, %xmm21
-# CHECK-NEXT: - - - - - - - - 1.00 - - - - - - - - - - - - - - vpxord %ymm19, %ymm19, %ymm21
-# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - - - - vpxorq %ymm19, %ymm19, %ymm21
-# CHECK-NEXT: - - - - - - - - - - 2.00 - - - - - - - - - - - - vpxord %zmm19, %zmm19, %zmm21
-# CHECK-NEXT: - - - - - - - - - 2.00 - - - - - - - - - - - - - vpxorq %zmm19, %zmm19, %zmm21
+# CHECK-NEXT: - - - - - - - - 1.00 - - - - - - - - - - - - - - vpxorq %xmm19, %xmm19, %xmm21
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - - - - - - - - - vpxord %ymm19, %ymm19, %ymm21
+# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - - - - - - vpxorq %ymm19, %ymm19, %ymm21
+# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vpxord %zmm19, %zmm19, %zmm21
+# CHECK-NEXT: - - - - - - - - - - - 2.00 - - - - - - - - - - - vpxorq %zmm19, %zmm19, %zmm21
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 0123456789
-# CHECK-NEXT: Index 0123456789 0123456789 01
-
-# CHECK: [0,0] DR . . . . . . . .. subl %eax, %eax
-# CHECK-NEXT: [0,1] DR . . . . . . . .. subq %rax, %rax
-# CHECK-NEXT: [0,2] DR . . . . . . . .. xorl %eax, %eax
-# CHECK-NEXT: [0,3] DR . . . . . . . .. xorq %rax, %rax
-# CHECK-NEXT: [0,4] DeER . . . . . . . .. pcmpgtb %mm2, %mm2
-# CHECK-NEXT: [0,5] D=eER. . . . . . . .. pcmpgtd %mm2, %mm2
-# CHECK-NEXT: [0,6] .D=eER . . . . . . .. pcmpgtw %mm2, %mm2
-# CHECK-NEXT: [0,7] .DeE-R . . . . . . .. pcmpgtb %xmm2, %xmm2
-# CHECK-NEXT: [0,8] .DeE-R . . . . . . .. pcmpgtd %xmm2, %xmm2
-# CHECK-NEXT: [0,9] .DeE-R . . . . . . .. pcmpgtq %xmm2, %xmm2
-# CHECK-NEXT: [0,10] .D=eER . . . . . . .. pcmpgtw %xmm2, %xmm2
-# CHECK-NEXT: [0,11] .D---R . . . . . . .. vpcmpgtb %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,12] . D--R . . . . . . .. vpcmpgtd %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,13] . D--R . . . . . . .. vpcmpgtq %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,14] . D--R . . . . . . .. vpcmpgtw %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,15] . D---R . . . . . . .. vpcmpgtb %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,16] . D---R . . . . . . .. vpcmpgtd %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,17] . D---R . . . . . . .. vpcmpgtq %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,18] . D--R . . . . . . .. vpcmpgtw %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,19] . D--R . . . . . . .. vpcmpgtb %ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,20] . D--R . . . . . . .. vpcmpgtd %ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,21] . D--R . . . . . . .. vpcmpgtq %ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,22] . D--R . . . . . . .. vpcmpgtw %ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,23] . D--R . . . . . . .. vpcmpgtb %ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,24] . D--R . . . . . . .. vpcmpgtd %ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,25] . D--R . . . . . . .. vpcmpgtq %ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,26] . D--R . . . . . . .. vpcmpgtw %ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,27] . DeER . . . . . . .. psubb %mm2, %mm2
-# CHECK-NEXT: [0,28] . D=eER . . . . . . .. psubd %mm2, %mm2
-# CHECK-NEXT: [0,29] . D==eER. . . . . . .. psubq %mm2, %mm2
-# CHECK-NEXT: [0,30] . D==eER . . . . . .. psubw %mm2, %mm2
-# CHECK-NEXT: [0,31] . DeE--R . . . . . .. psubb %xmm2, %xmm2
-# CHECK-NEXT: [0,32] . DeE--R . . . . . .. psubd %xmm2, %xmm2
-# CHECK-NEXT: [0,33] . DeE--R . . . . . .. psubq %xmm2, %xmm2
-# CHECK-NEXT: [0,34] . D=eE-R . . . . . .. psubw %xmm2, %xmm2
-# CHECK-NEXT: [0,35] . D----R . . . . . .. vpsubb %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,36] . .D---R . . . . . .. vpsubd %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,37] . .D---R . . . . . .. vpsubq %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,38] . .D---R . . . . . .. vpsubw %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,39] . .D----R . . . . . .. vpsubb %ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,40] . .D----R . . . . . .. vpsubd %ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,41] . .D----R . . . . . .. vpsubq %ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,42] . . D---R . . . . . .. vpsubw %ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,43] . . D---R . . . . . .. vpsubb %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,44] . . D---R . . . . . .. vpsubd %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,45] . . D---R . . . . . .. vpsubq %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,46] . . D---R . . . . . .. vpsubw %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,47] . . D---R . . . . . .. vpsubb %ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,48] . . D---R . . . . . .. vpsubd %ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,49] . . D---R . . . . . .. vpsubq %ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,50] . . D---R . . . . . .. vpsubw %ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,51] . . DeE-R . . . . . .. vpsubb ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/108314
More information about the llvm-commits
mailing list