[llvm] [X86] Add RCU for Skylake Models (PR #153832)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 15 09:44:50 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Aiden Grossman (boomanaiden154)
<details>
<summary>Changes</summary>
We cannot actually retire an infinite number of uops per cycle. This patch adds a RCU to the skylake scheduling model to fix this. I'm purposefully using a loose upper bound here. We're unlikely to actually get four fused uops per cycle, but this is better than not setting anything. Most realistic code I've put through uiCA will retire up to ~6 uops per cycle.
Information taken from https://en.wikichip.org/wiki/intel/microarchitectures/skylake_(client).
This requires modification of the two zero idiom tests because we do not currently model the CPU frontend which would likely be the actual bottleneck in that case.
Related to #<!-- -->153747.
---
Patch is 60.34 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/153832.diff
4 Files Affected:
- (modified) llvm/lib/Target/X86/X86SchedSkylakeClient.td (+6)
- (modified) llvm/lib/Target/X86/X86SchedSkylakeServer.td (+6)
- (modified) llvm/test/tools/llvm-mca/X86/SkylakeClient/zero-idioms.s (+129-129)
- (modified) llvm/test/tools/llvm-mca/X86/SkylakeServer/zero-idioms.s (+224-224)
``````````diff
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
index 8cd52e2a8ebc9..f15a7c7076414 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
@@ -70,6 +70,12 @@ def SKLPortAny : ProcResGroup<[SKLPort0, SKLPort1, SKLPort2, SKLPort3, SKLPort4,
let BufferSize=60;
}
+// Skylake can retire up to four (potentially fused) uops per cycle. Set the
+// limit to twice that given we do not model fused uops as only taking up one
+// retirement slot. I could not find any documented sources on how many
+// in-flight micro-ops can be tracked.
+def SKRCU : RetireControlUnit<0, 8>;
+
// Integer loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
// cycles after the memory operand.
def : ReadAdvance<ReadAfterLd, 5>;
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index 14a51d1ed9ce6..2a793d0205986 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -70,6 +70,12 @@ def SKXPortAny : ProcResGroup<[SKXPort0, SKXPort1, SKXPort2, SKXPort3, SKXPort4,
let BufferSize=60;
}
+// Skylake can retire up to four (potentially fused) uops per cycle. Set the
+// limit to twice that given we do not model fused uops as only taking up one
+// retirement slot. I could not find any documented sources on how many
+// in-flight micro-ops can be tracked.
+def SKXRCU : RetireControlUnit<0, 8>;
+
// Integer loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
// cycles after the memory operand.
def : ReadAdvance<ReadAfterLd, 5>;
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/zero-idioms.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/zero-idioms.s
index 0f19ef26806a9..2dce795839090 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/zero-idioms.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/zero-idioms.s
@@ -105,12 +105,12 @@ vpxor %ymm3, %ymm3, %ymm5
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 83
-# CHECK-NEXT: Total Cycles: 17
+# CHECK-NEXT: Total Cycles: 19
# CHECK-NEXT: Total uOps: 83
# CHECK: Dispatch Width: 6
-# CHECK-NEXT: uOps Per Cycle: 4.88
-# CHECK-NEXT: IPC: 4.88
+# CHECK-NEXT: uOps Per Cycle: 4.37
+# CHECK-NEXT: IPC: 4.37
# CHECK-NEXT: Block RThroughput: 13.8
# CHECK: Instruction Info:
@@ -208,7 +208,7 @@ vpxor %ymm3, %ymm3, %ymm5
# CHECK: Register File statistics:
# CHECK-NEXT: Total number of mappings created: 24
-# CHECK-NEXT: Max number of mappings used: 15
+# CHECK-NEXT: Max number of mappings used: 16
# CHECK: Resources:
# CHECK-NEXT: [0] - SKLDivider
@@ -313,92 +313,92 @@ vpxor %ymm3, %ymm3, %ymm5
# CHECK-NEXT: - - - - - - - - - - vpxor %ymm3, %ymm3, %ymm5
# CHECK: Timeline view:
-# CHECK-NEXT: 0123456
+# CHECK-NEXT: 012345678
# CHECK-NEXT: Index 0123456789
-# CHECK: [0,0] DR . . .. subl %eax, %eax
-# CHECK-NEXT: [0,1] DR . . .. subq %rax, %rax
-# CHECK-NEXT: [0,2] DR . . .. xorl %eax, %eax
-# CHECK-NEXT: [0,3] DR . . .. xorq %rax, %rax
-# CHECK-NEXT: [0,4] DeER . . .. pcmpgtb %mm2, %mm2
-# CHECK-NEXT: [0,5] D=eER. . .. pcmpgtd %mm2, %mm2
-# CHECK-NEXT: [0,6] .D=eER . .. pcmpgtw %mm2, %mm2
-# CHECK-NEXT: [0,7] .D---R . .. pcmpgtb %xmm2, %xmm2
-# CHECK-NEXT: [0,8] .D---R . .. pcmpgtd %xmm2, %xmm2
-# CHECK-NEXT: [0,9] .D---R . .. pcmpgtq %xmm2, %xmm2
-# CHECK-NEXT: [0,10] .D---R . .. pcmpgtw %xmm2, %xmm2
-# CHECK-NEXT: [0,11] .D---R . .. vpcmpgtb %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,12] . D--R . .. vpcmpgtd %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,13] . D--R . .. vpcmpgtq %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,14] . D--R . .. vpcmpgtw %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,15] . D--R . .. vpcmpgtb %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,16] . D--R . .. vpcmpgtd %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,17] . D--R . .. vpcmpgtq %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,18] . D-R . .. vpcmpgtw %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,19] . D-R . .. vpcmpgtb %ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,20] . D-R . .. vpcmpgtd %ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,21] . D-R . .. vpcmpgtq %ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,22] . D-R . .. vpcmpgtw %ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,23] . D-R . .. vpcmpgtb %ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,24] . DR . .. vpcmpgtd %ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,25] . DR . .. vpcmpgtq %ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,26] . DR . .. vpcmpgtw %ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,27] . DeER . .. psubb %mm2, %mm2
-# CHECK-NEXT: [0,28] . D=eER . .. psubd %mm2, %mm2
-# CHECK-NEXT: [0,29] . D==eER. .. psubq %mm2, %mm2
-# CHECK-NEXT: [0,30] . D==eER .. psubw %mm2, %mm2
-# CHECK-NEXT: [0,31] . D----R .. psubb %xmm2, %xmm2
-# CHECK-NEXT: [0,32] . D----R .. psubd %xmm2, %xmm2
-# CHECK-NEXT: [0,33] . D----R .. psubq %xmm2, %xmm2
-# CHECK-NEXT: [0,34] . D----R .. psubw %xmm2, %xmm2
-# CHECK-NEXT: [0,35] . D----R .. vpsubb %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,36] . .D---R .. vpsubd %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,37] . .D---R .. vpsubq %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,38] . .D---R .. vpsubw %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,39] . .D---R .. vpsubb %ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,40] . .D---R .. vpsubd %ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,41] . .D---R .. vpsubq %ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,42] . . D--R .. vpsubw %ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,43] . . D--R .. vpsubb %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,44] . . D--R .. vpsubd %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,45] . . D--R .. vpsubq %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,46] . . D--R .. vpsubw %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,47] . . D--R .. vpsubb %ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,48] . . D-R .. vpsubd %ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,49] . . D-R .. vpsubq %ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,50] . . D-R .. vpsubw %ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,51] . . DeER .. andnps %xmm0, %xmm0
-# CHECK-NEXT: [0,52] . . DeER .. andnpd %xmm1, %xmm1
-# CHECK-NEXT: [0,53] . . DeER .. vandnps %xmm2, %xmm2, %xmm2
-# CHECK-NEXT: [0,54] . . DeER .. vandnpd %xmm1, %xmm1, %xmm1
-# CHECK-NEXT: [0,55] . . DeER .. vandnps %ymm2, %ymm2, %ymm2
-# CHECK-NEXT: [0,56] . . D=eER .. vandnpd %ymm1, %ymm1, %ymm1
-# CHECK-NEXT: [0,57] . . DeE-R .. pandn %mm2, %mm2
-# CHECK-NEXT: [0,58] . . D=eER .. pandn %xmm2, %xmm2
-# CHECK-NEXT: [0,59] . . D=eER .. vpandn %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,60] . . D=eER.. vpandn %ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,61] . . D=eER.. vandnps %xmm2, %xmm2, %xmm5
-# CHECK-NEXT: [0,62] . . D=eER.. vandnpd %xmm1, %xmm1, %xmm5
-# CHECK-NEXT: [0,63] . . D==eER. vpandn %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,64] . . D==eER. vandnps %ymm2, %ymm2, %ymm5
-# CHECK-NEXT: [0,65] . . D==eER. vandnpd %ymm1, %ymm1, %ymm5
-# CHECK-NEXT: [0,66] . . .D==eER vpandn %ymm3, %ymm3, %ymm5
-# CHECK-NEXT: [0,67] . . .D----R xorps %xmm0, %xmm0
-# CHECK-NEXT: [0,68] . . .D----R xorpd %xmm1, %xmm1
-# CHECK-NEXT: [0,69] . . .D----R vxorps %xmm2, %xmm2, %xmm2
-# CHECK-NEXT: [0,70] . . .D----R vxorpd %xmm1, %xmm1, %xmm1
-# CHECK-NEXT: [0,71] . . .D----R vxorps %ymm2, %ymm2, %ymm2
-# CHECK-NEXT: [0,72] . . . D---R vxorpd %ymm1, %ymm1, %ymm1
-# CHECK-NEXT: [0,73] . . . D=eER pxor %mm2, %mm2
-# CHECK-NEXT: [0,74] . . . D---R pxor %xmm2, %xmm2
-# CHECK-NEXT: [0,75] . . . D---R vpxor %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: [0,76] . . . D---R vpxor %ymm3, %ymm3, %ymm3
-# CHECK-NEXT: [0,77] . . . D---R vxorps %xmm4, %xmm4, %xmm5
-# CHECK-NEXT: [0,78] . . . D--R vxorpd %xmm1, %xmm1, %xmm3
-# CHECK-NEXT: [0,79] . . . D--R vxorps %ymm4, %ymm4, %ymm5
-# CHECK-NEXT: [0,80] . . . D--R vxorpd %ymm1, %ymm1, %ymm3
-# CHECK-NEXT: [0,81] . . . D--R vpxor %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: [0,82] . . . D--R vpxor %ymm3, %ymm3, %ymm5
+# CHECK: [0,0] DR . . . . subl %eax, %eax
+# CHECK-NEXT: [0,1] DR . . . . subq %rax, %rax
+# CHECK-NEXT: [0,2] DR . . . . xorl %eax, %eax
+# CHECK-NEXT: [0,3] DR . . . . xorq %rax, %rax
+# CHECK-NEXT: [0,4] DeER . . . . pcmpgtb %mm2, %mm2
+# CHECK-NEXT: [0,5] D=eER. . . . pcmpgtd %mm2, %mm2
+# CHECK-NEXT: [0,6] .D=eER . . . pcmpgtw %mm2, %mm2
+# CHECK-NEXT: [0,7] .D---R . . . pcmpgtb %xmm2, %xmm2
+# CHECK-NEXT: [0,8] .D---R . . . pcmpgtd %xmm2, %xmm2
+# CHECK-NEXT: [0,9] .D---R . . . pcmpgtq %xmm2, %xmm2
+# CHECK-NEXT: [0,10] .D---R . . . pcmpgtw %xmm2, %xmm2
+# CHECK-NEXT: [0,11] .D---R . . . vpcmpgtb %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,12] . D--R . . . vpcmpgtd %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,13] . D--R . . . vpcmpgtq %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,14] . D---R . . . vpcmpgtw %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,15] . D---R . . . vpcmpgtb %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,16] . D---R . . . vpcmpgtd %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,17] . D---R . . . vpcmpgtq %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,18] . D--R . . . vpcmpgtw %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,19] . D--R . . . vpcmpgtb %ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,20] . D--R . . . vpcmpgtd %ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,21] . D--R . . . vpcmpgtq %ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,22] . D---R . . . vpcmpgtw %ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,23] . D---R . . . vpcmpgtb %ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,24] . D--R . . . vpcmpgtd %ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,25] . D--R . . . vpcmpgtq %ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,26] . D--R . . . vpcmpgtw %ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,27] . DeER . . . psubb %mm2, %mm2
+# CHECK-NEXT: [0,28] . D=eER . . . psubd %mm2, %mm2
+# CHECK-NEXT: [0,29] . D==eER. . . psubq %mm2, %mm2
+# CHECK-NEXT: [0,30] . D==eER . . psubw %mm2, %mm2
+# CHECK-NEXT: [0,31] . D----R . . psubb %xmm2, %xmm2
+# CHECK-NEXT: [0,32] . D----R . . psubd %xmm2, %xmm2
+# CHECK-NEXT: [0,33] . D----R . . psubq %xmm2, %xmm2
+# CHECK-NEXT: [0,34] . D----R . . psubw %xmm2, %xmm2
+# CHECK-NEXT: [0,35] . D----R . . vpsubb %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,36] . .D---R . . vpsubd %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,37] . .D---R . . vpsubq %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,38] . .D----R . . vpsubw %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,39] . .D----R . . vpsubb %ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,40] . .D----R . . vpsubd %ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,41] . .D----R . . vpsubq %ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,42] . . D---R . . vpsubw %ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,43] . . D---R . . vpsubb %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,44] . . D---R . . vpsubd %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,45] . . D---R . . vpsubq %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,46] . . D----R . . vpsubw %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,47] . . D----R . . vpsubb %ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,48] . . D---R . . vpsubd %ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,49] . . D---R . . vpsubq %ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,50] . . D---R . . vpsubw %ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,51] . . DeE-R . . andnps %xmm0, %xmm0
+# CHECK-NEXT: [0,52] . . DeE-R . . andnpd %xmm1, %xmm1
+# CHECK-NEXT: [0,53] . . DeE-R . . vandnps %xmm2, %xmm2, %xmm2
+# CHECK-NEXT: [0,54] . . DeE-R . . vandnpd %xmm1, %xmm1, %xmm1
+# CHECK-NEXT: [0,55] . . DeE-R . . vandnps %ymm2, %ymm2, %ymm2
+# CHECK-NEXT: [0,56] . . D=eER . . vandnpd %ymm1, %ymm1, %ymm1
+# CHECK-NEXT: [0,57] . . DeE-R . . pandn %mm2, %mm2
+# CHECK-NEXT: [0,58] . . D=eER . . pandn %xmm2, %xmm2
+# CHECK-NEXT: [0,59] . . D=eER . . vpandn %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,60] . . D=eER. . vpandn %ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,61] . . D=eER. . vandnps %xmm2, %xmm2, %xmm5
+# CHECK-NEXT: [0,62] . . D=eER. . vandnpd %xmm1, %xmm1, %xmm5
+# CHECK-NEXT: [0,63] . . D==eER . vpandn %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,64] . . D==eER . vandnps %ymm2, %ymm2, %ymm5
+# CHECK-NEXT: [0,65] . . D==eER . vandnpd %ymm1, %ymm1, %ymm5
+# CHECK-NEXT: [0,66] . . .D==eER . vpandn %ymm3, %ymm3, %ymm5
+# CHECK-NEXT: [0,67] . . .D----R . xorps %xmm0, %xmm0
+# CHECK-NEXT: [0,68] . . .D----R . xorpd %xmm1, %xmm1
+# CHECK-NEXT: [0,69] . . .D----R . vxorps %xmm2, %xmm2, %xmm2
+# CHECK-NEXT: [0,70] . . .D----R . vxorpd %xmm1, %xmm1, %xmm1
+# CHECK-NEXT: [0,71] . . .D----R . vxorps %ymm2, %ymm2, %ymm2
+# CHECK-NEXT: [0,72] . . . D---R . vxorpd %ymm1, %ymm1, %ymm1
+# CHECK-NEXT: [0,73] . . . D=eER . pxor %mm2, %mm2
+# CHECK-NEXT: [0,74] . . . D----R. pxor %xmm2, %xmm2
+# CHECK-NEXT: [0,75] . . . D----R. vpxor %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: [0,76] . . . D----R. vpxor %ymm3, %ymm3, %ymm3
+# CHECK-NEXT: [0,77] . . . D----R. vxorps %xmm4, %xmm4, %xmm5
+# CHECK-NEXT: [0,78] . . . D---R. vxorpd %xmm1, %xmm1, %xmm3
+# CHECK-NEXT: [0,79] . . . D---R. vxorps %ymm4, %ymm4, %ymm5
+# CHECK-NEXT: [0,80] . . . D---R. vxorpd %ymm1, %ymm1, %ymm3
+# CHECK-NEXT: [0,81] . . . D---R. vpxor %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: [0,82] . . . D----R vpxor %ymm3, %ymm3, %ymm5
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
@@ -421,19 +421,19 @@ vpxor %ymm3, %ymm3, %ymm5
# CHECK-NEXT: 11. 1 0.0 0.0 3.0 vpcmpgtb %xmm3, %xmm3, %xmm3
# CHECK-NEXT: 12. 1 0.0 0.0 2.0 vpcmpgtd %xmm3, %xmm3, %xmm3
# CHECK-NEXT: 13. 1 0.0 0.0 2.0 vpcmpgtq %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 14. 1 0.0 0.0 2.0 vpcmpgtw %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 15. 1 0.0 0.0 2.0 vpcmpgtb %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 16. 1 0.0 0.0 2.0 vpcmpgtd %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 17. 1 0.0 0.0 2.0 vpcmpgtq %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 18. 1 0.0 0.0 1.0 vpcmpgtw %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 19. 1 0.0 0.0 1.0 vpcmpgtb %ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 20. 1 0.0 0.0 1.0 vpcmpgtd %ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 21. 1 0.0 0.0 1.0 vpcmpgtq %ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 22. 1 0.0 0.0 1.0 vpcmpgtw %ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 23. 1 0.0 0.0 1.0 vpcmpgtb %ymm3, %ymm3, %ymm5
-# CHECK-NEXT: 24. 1 0.0 0.0 0.0 vpcmpgtd %ymm3, %ymm3, %ymm5
-# CHECK-NEXT: 25. 1 0.0 0.0 0.0 vpcmpgtq %ymm3, %ymm3, %ymm5
-# CHECK-NEXT: 26. 1 0.0 0.0 0.0 vpcmpgtw %ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 14. 1 0.0 0.0 3.0 vpcmpgtw %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 15. 1 0.0 0.0 3.0 vpcmpgtb %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 16. 1 0.0 0.0 3.0 vpcmpgtd %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 17. 1 0.0 0.0 3.0 vpcmpgtq %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 18. 1 0.0 0.0 2.0 vpcmpgtw %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 19. 1 0.0 0.0 2.0 vpcmpgtb %ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 20. 1 0.0 0.0 2.0 vpcmpgtd %ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 21. 1 0.0 0.0 2.0 vpcmpgtq %ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 22. 1 0.0 0.0 3.0 vpcmpgtw %ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 23. 1 0.0 0.0 3.0 vpcmpgtb %ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 24. 1 0.0 0.0 2.0 vpcmpgtd %ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 25. 1 0.0 0.0 2.0 vpcmpgtq %ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 26. 1 0.0 0.0 2.0 vpcmpgtw %ymm3, %ymm3, %ymm5
# CHECK-NEXT: 27. 1 1.0 1.0 0.0 psubb %mm2, %mm2
# CHECK-NEXT: 28. 1 2.0 0.0 0.0 psubd %mm2, %mm2
# CHECK-NEXT: 29. 1 3.0 0.0 0.0 psubq %mm2, %mm2
@@ -445,24 +445,24 @@ vpxor %ymm3, %ymm3, %ymm5
# CHECK-NEXT: 35. 1 0.0 0.0 4.0 vpsubb %xmm3, %xmm3, %xmm3
# CHECK-NEXT: 36. 1 0.0 0.0 3.0 vpsubd %xmm3, %xmm3, %xmm3
# CHECK-NEXT: 37. 1 0.0 0.0 3.0 vpsubq %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 38. 1 0.0 0.0 3.0 vpsubw %xmm3, %xmm3, %xmm3
-# CHECK-NEXT: 39. 1 0.0 0.0 3.0 vpsubb %ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 40. 1 0.0 0.0 3.0 vpsubd %ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 41. 1 0.0 0.0 3.0 vpsubq %ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 42. 1 0.0 0.0 2.0 vpsubw %ymm3, %ymm3, %ymm3
-# CHECK-NEXT: 43. 1 0.0 0.0 2.0 vpsubb %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 44. 1 0.0 0.0 2.0 vpsubd %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 45. 1 0.0 0.0 2.0 vpsubq %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 46. 1 0.0 0.0 2.0 vpsubw %xmm3, %xmm3, %xmm5
-# CHECK-NEXT: 47. 1 0.0 0.0 2.0 vpsubb %ymm3, %ymm3, %ymm5
-# CHECK-NEXT: 48. 1 0.0 0.0 1.0 vpsubd %ymm3, %ymm3, %ymm5
-# CHECK-NEXT: 49. 1 0.0 0.0 1.0 vpsubq %ymm3, %ymm3, %ymm5
-# CHECK-NEXT: 50. 1 0.0 0.0 1.0 vpsubw %ymm3, %ymm3, %ymm5
-# CHECK-NEXT: 51. 1 1.0 1.0 0.0 andnps %xmm0, %xmm0
-# CHECK-NEXT: 52. 1 1.0 1.0 0.0 andnpd %xmm1, %xmm1
-# CHECK-NEXT: 53. 1 1.0 1.0 0.0 vandnps %xmm2, %xmm2, %xmm2
-# CHECK-NEXT: 54. 1 1.0 0.0 0.0 vandnpd %xmm1, %xmm1, %xmm1
-# CHECK-NEXT: 55. 1 1.0 0.0 0.0 vandnps %ymm2, %ymm2, %ymm2
+# CHECK-NEXT: 38. 1 0.0 0.0 4.0 vpsubw %xmm3, %xmm3, %xmm3
+# CHECK-NEXT: 39. 1 0.0 0.0 4.0 vpsubb %ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 40. 1 0.0 0.0 4.0 vpsubd %ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 41. 1 0.0 0.0 4.0 vpsubq %ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 42. 1 0.0 0.0 3.0 vpsubw %ymm3, %ymm3, %ymm3
+# CHECK-NEXT: 43. 1 0.0 0.0 3.0 vpsubb %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 44. 1 0.0 0.0 3.0 vpsubd %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 45. 1 0.0 0.0 3.0 vpsubq %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 46. 1 0.0 0.0 4.0 vpsubw %xmm3, %xmm3, %xmm5
+# CHECK-NEXT: 47. 1 0.0 0.0 4.0 vpsubb %ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 48. 1 0.0 0.0 3.0 vpsubd %ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 49. 1 0.0 0.0 3.0 vpsubq %ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 50. 1 0.0 0.0 3.0 vpsubw %ymm3, %ymm3, %ymm5
+# CHECK-NEXT: 51. 1 1.0 1.0 1.0 andnps %xmm0, %xmm0
+# CHECK-NEXT: 52. 1 1.0 1.0 1.0 andnpd %xmm1, %xmm1
+# CHECK-NEXT: 53. 1 1.0 1.0 1.0 vandnp...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/153832
More information about the llvm-commits
mailing list