[llvm] [llvm-mca][x86] Ensure avxvnni tests actually test the avxvnni instructions (PR #157892)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 10 09:33:36 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
Noticed while checking #<!-- -->97271 - discovered we weren't actually testing the vex variants of the vnni instructions in the avxvnni mca tests
Fixing this causes the znver4 results to break, because it turns out we didn't have consistent instruction naming for the avx and avx512 variants, breaking the regex matching
So add the missing reg operand to the avx512 vnni instruction signatures to match avx vnni
---
Patch is 68.87 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/157892.diff
9 Files Affected:
- (modified) llvm/lib/Target/X86/X86InstrAVX512.td (+9-9)
- (modified) llvm/lib/Target/X86/X86InstrInfo.cpp (+84-84)
- (modified) llvm/lib/Target/X86/X86ScheduleZnver4.td (+1-1)
- (modified) llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-avxvnni.s (+48-48)
- (modified) llvm/test/tools/llvm-mca/X86/Generic/resources-avxvnni.s (+48-48)
- (modified) llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avxvnni.s (+48-48)
- (modified) llvm/test/tools/llvm-mca/X86/LunarlakeP/resources-avxvnni.s (+48-48)
- (modified) llvm/test/tools/llvm-mca/X86/SapphireRapids/resources-avxvnni.s (+48-48)
- (modified) llvm/test/tools/llvm-mca/X86/Znver4/resources-avxvnni.s (+48-48)
``````````diff
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 3401f6f04800e..b8f299965faa3 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -12404,14 +12404,14 @@ multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
bit IsCommutable> {
let ExeDomain = VTI.ExeDomain in {
- defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
+ defm rr : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
(ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
"$src3, $src2", "$src2, $src3",
(VTI.VT (OpNode VTI.RC:$src1,
VTI.RC:$src2, VTI.RC:$src3)),
IsCommutable, IsCommutable>,
EVEX, VVVV, T8, Sched<[sched]>;
- defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
+ defm rm : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
(ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
"$src3, $src2", "$src2, $src3",
(VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
@@ -12419,7 +12419,7 @@ multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
EVEX, VVVV, EVEX_CD8<32, CD8VF>, T8,
Sched<[sched.Folded, sched.ReadAfterFold,
sched.ReadAfterFold]>;
- defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
+ defm rmb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
(ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
OpStr, "${src3}"#VTI.BroadcastStr#", $src2",
"$src2, ${src3}"#VTI.BroadcastStr,
@@ -12459,24 +12459,24 @@ defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul
let Predicates = [HasVNNI] in {
def : Pat<(v16i32 (add VR512:$src1,
(X86vpmaddwd_su VR512:$src2, VR512:$src3))),
- (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>;
+ (VPDPWSSDZrr VR512:$src1, VR512:$src2, VR512:$src3)>;
def : Pat<(v16i32 (add VR512:$src1,
(X86vpmaddwd_su VR512:$src2, (load addr:$src3)))),
- (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>;
+ (VPDPWSSDZrm VR512:$src1, VR512:$src2, addr:$src3)>;
}
let Predicates = [HasVNNI,HasVLX] in {
def : Pat<(v8i32 (add VR256X:$src1,
(X86vpmaddwd_su VR256X:$src2, VR256X:$src3))),
- (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
+ (VPDPWSSDZ256rr VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
def : Pat<(v8i32 (add VR256X:$src1,
(X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))),
- (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>;
+ (VPDPWSSDZ256rm VR256X:$src1, VR256X:$src2, addr:$src3)>;
def : Pat<(v4i32 (add VR128X:$src1,
(X86vpmaddwd_su VR128X:$src2, VR128X:$src3))),
- (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
+ (VPDPWSSDZ128rr VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
def : Pat<(v4i32 (add VR128X:$src1,
(X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))),
- (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>;
+ (VPDPWSSDZ128rm VR128X:$src1, VR128X:$src2, addr:$src3)>;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index a68edf4d2b7ee..1f6915929646a 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -2939,78 +2939,78 @@ bool X86InstrInfo::findCommutedOpIndices(const MachineInstr &MI,
case X86::VPDPBUUDSYrr:
case X86::VPDPBUUDrr:
case X86::VPDPBUUDYrr:
- case X86::VPDPBSSDSZ128r:
- case X86::VPDPBSSDSZ128rk:
- case X86::VPDPBSSDSZ128rkz:
- case X86::VPDPBSSDSZ256r:
- case X86::VPDPBSSDSZ256rk:
- case X86::VPDPBSSDSZ256rkz:
- case X86::VPDPBSSDSZr:
- case X86::VPDPBSSDSZrk:
- case X86::VPDPBSSDSZrkz:
- case X86::VPDPBSSDZ128r:
- case X86::VPDPBSSDZ128rk:
- case X86::VPDPBSSDZ128rkz:
- case X86::VPDPBSSDZ256r:
- case X86::VPDPBSSDZ256rk:
- case X86::VPDPBSSDZ256rkz:
- case X86::VPDPBSSDZr:
- case X86::VPDPBSSDZrk:
- case X86::VPDPBSSDZrkz:
- case X86::VPDPBUUDSZ128r:
- case X86::VPDPBUUDSZ128rk:
- case X86::VPDPBUUDSZ128rkz:
- case X86::VPDPBUUDSZ256r:
- case X86::VPDPBUUDSZ256rk:
- case X86::VPDPBUUDSZ256rkz:
- case X86::VPDPBUUDSZr:
- case X86::VPDPBUUDSZrk:
- case X86::VPDPBUUDSZrkz:
- case X86::VPDPBUUDZ128r:
- case X86::VPDPBUUDZ128rk:
- case X86::VPDPBUUDZ128rkz:
- case X86::VPDPBUUDZ256r:
- case X86::VPDPBUUDZ256rk:
- case X86::VPDPBUUDZ256rkz:
- case X86::VPDPBUUDZr:
- case X86::VPDPBUUDZrk:
- case X86::VPDPBUUDZrkz:
- case X86::VPDPWSSDZ128r:
- case X86::VPDPWSSDZ128rk:
- case X86::VPDPWSSDZ128rkz:
- case X86::VPDPWSSDZ256r:
- case X86::VPDPWSSDZ256rk:
- case X86::VPDPWSSDZ256rkz:
- case X86::VPDPWSSDZr:
- case X86::VPDPWSSDZrk:
- case X86::VPDPWSSDZrkz:
- case X86::VPDPWSSDSZ128r:
- case X86::VPDPWSSDSZ128rk:
- case X86::VPDPWSSDSZ128rkz:
- case X86::VPDPWSSDSZ256r:
- case X86::VPDPWSSDSZ256rk:
- case X86::VPDPWSSDSZ256rkz:
- case X86::VPDPWSSDSZr:
- case X86::VPDPWSSDSZrk:
- case X86::VPDPWSSDSZrkz:
- case X86::VPDPWUUDZ128r:
- case X86::VPDPWUUDZ128rk:
- case X86::VPDPWUUDZ128rkz:
- case X86::VPDPWUUDZ256r:
- case X86::VPDPWUUDZ256rk:
- case X86::VPDPWUUDZ256rkz:
- case X86::VPDPWUUDZr:
- case X86::VPDPWUUDZrk:
- case X86::VPDPWUUDZrkz:
- case X86::VPDPWUUDSZ128r:
- case X86::VPDPWUUDSZ128rk:
- case X86::VPDPWUUDSZ128rkz:
- case X86::VPDPWUUDSZ256r:
- case X86::VPDPWUUDSZ256rk:
- case X86::VPDPWUUDSZ256rkz:
- case X86::VPDPWUUDSZr:
- case X86::VPDPWUUDSZrk:
- case X86::VPDPWUUDSZrkz:
+ case X86::VPDPBSSDSZ128rr:
+ case X86::VPDPBSSDSZ128rrk:
+ case X86::VPDPBSSDSZ128rrkz:
+ case X86::VPDPBSSDSZ256rr:
+ case X86::VPDPBSSDSZ256rrk:
+ case X86::VPDPBSSDSZ256rrkz:
+ case X86::VPDPBSSDSZrr:
+ case X86::VPDPBSSDSZrrk:
+ case X86::VPDPBSSDSZrrkz:
+ case X86::VPDPBSSDZ128rr:
+ case X86::VPDPBSSDZ128rrk:
+ case X86::VPDPBSSDZ128rrkz:
+ case X86::VPDPBSSDZ256rr:
+ case X86::VPDPBSSDZ256rrk:
+ case X86::VPDPBSSDZ256rrkz:
+ case X86::VPDPBSSDZrr:
+ case X86::VPDPBSSDZrrk:
+ case X86::VPDPBSSDZrrkz:
+ case X86::VPDPBUUDSZ128rr:
+ case X86::VPDPBUUDSZ128rrk:
+ case X86::VPDPBUUDSZ128rrkz:
+ case X86::VPDPBUUDSZ256rr:
+ case X86::VPDPBUUDSZ256rrk:
+ case X86::VPDPBUUDSZ256rrkz:
+ case X86::VPDPBUUDSZrr:
+ case X86::VPDPBUUDSZrrk:
+ case X86::VPDPBUUDSZrrkz:
+ case X86::VPDPBUUDZ128rr:
+ case X86::VPDPBUUDZ128rrk:
+ case X86::VPDPBUUDZ128rrkz:
+ case X86::VPDPBUUDZ256rr:
+ case X86::VPDPBUUDZ256rrk:
+ case X86::VPDPBUUDZ256rrkz:
+ case X86::VPDPBUUDZrr:
+ case X86::VPDPBUUDZrrk:
+ case X86::VPDPBUUDZrrkz:
+ case X86::VPDPWSSDZ128rr:
+ case X86::VPDPWSSDZ128rrk:
+ case X86::VPDPWSSDZ128rrkz:
+ case X86::VPDPWSSDZ256rr:
+ case X86::VPDPWSSDZ256rrk:
+ case X86::VPDPWSSDZ256rrkz:
+ case X86::VPDPWSSDZrr:
+ case X86::VPDPWSSDZrrk:
+ case X86::VPDPWSSDZrrkz:
+ case X86::VPDPWSSDSZ128rr:
+ case X86::VPDPWSSDSZ128rrk:
+ case X86::VPDPWSSDSZ128rrkz:
+ case X86::VPDPWSSDSZ256rr:
+ case X86::VPDPWSSDSZ256rrk:
+ case X86::VPDPWSSDSZ256rrkz:
+ case X86::VPDPWSSDSZrr:
+ case X86::VPDPWSSDSZrrk:
+ case X86::VPDPWSSDSZrrkz:
+ case X86::VPDPWUUDZ128rr:
+ case X86::VPDPWUUDZ128rrk:
+ case X86::VPDPWUUDZ128rrkz:
+ case X86::VPDPWUUDZ256rr:
+ case X86::VPDPWUUDZ256rrk:
+ case X86::VPDPWUUDZ256rrkz:
+ case X86::VPDPWUUDZrr:
+ case X86::VPDPWUUDZrrk:
+ case X86::VPDPWUUDZrrkz:
+ case X86::VPDPWUUDSZ128rr:
+ case X86::VPDPWUUDSZ128rrk:
+ case X86::VPDPWUUDSZ128rrkz:
+ case X86::VPDPWUUDSZ256rr:
+ case X86::VPDPWUUDSZ256rrk:
+ case X86::VPDPWUUDSZ256rrkz:
+ case X86::VPDPWUUDSZrr:
+ case X86::VPDPWUUDSZrrk:
+ case X86::VPDPWUUDSZrrkz:
case X86::VPMADD52HUQrr:
case X86::VPMADD52HUQYrr:
case X86::VPMADD52HUQZ128r:
@@ -10822,12 +10822,12 @@ bool X86InstrInfo::getMachineCombinerPatterns(
}
break;
}
- case X86::VPDPWSSDZ128r:
- case X86::VPDPWSSDZ128m:
- case X86::VPDPWSSDZ256r:
- case X86::VPDPWSSDZ256m:
- case X86::VPDPWSSDZr:
- case X86::VPDPWSSDZm: {
+ case X86::VPDPWSSDZ128rr:
+ case X86::VPDPWSSDZ128rm:
+ case X86::VPDPWSSDZ256rr:
+ case X86::VPDPWSSDZ256rm:
+ case X86::VPDPWSSDZrr:
+ case X86::VPDPWSSDZrm: {
if (Subtarget.hasBWI() && !Subtarget.hasFastDPWSSD()) {
Patterns.push_back(X86MachineCombinerPattern::DPWSSD);
return true;
@@ -10866,11 +10866,11 @@ genAlternativeDpCodeSequence(MachineInstr &Root, const TargetInstrInfo &TII,
MaddOpc = X86::VPMADDWDrm;
AddOpc = X86::VPADDDrr;
break;
- case X86::VPDPWSSDZ128r:
+ case X86::VPDPWSSDZ128rr:
MaddOpc = X86::VPMADDWDZ128rr;
AddOpc = X86::VPADDDZ128rr;
break;
- case X86::VPDPWSSDZ128m:
+ case X86::VPDPWSSDZ128rm:
MaddOpc = X86::VPMADDWDZ128rm;
AddOpc = X86::VPADDDZ128rr;
break;
@@ -10886,11 +10886,11 @@ genAlternativeDpCodeSequence(MachineInstr &Root, const TargetInstrInfo &TII,
MaddOpc = X86::VPMADDWDYrm;
AddOpc = X86::VPADDDYrr;
break;
- case X86::VPDPWSSDZ256r:
+ case X86::VPDPWSSDZ256rr:
MaddOpc = X86::VPMADDWDZ256rr;
AddOpc = X86::VPADDDZ256rr;
break;
- case X86::VPDPWSSDZ256m:
+ case X86::VPDPWSSDZ256rm:
MaddOpc = X86::VPMADDWDZ256rm;
AddOpc = X86::VPADDDZ256rr;
break;
@@ -10898,11 +10898,11 @@ genAlternativeDpCodeSequence(MachineInstr &Root, const TargetInstrInfo &TII,
// -->
// vpmaddwd zmm3,zmm3,zmm1
// vpaddd zmm2,zmm2,zmm3
- case X86::VPDPWSSDZr:
+ case X86::VPDPWSSDZrr:
MaddOpc = X86::VPMADDWDZrr;
AddOpc = X86::VPADDDZrr;
break;
- case X86::VPDPWSSDZm:
+ case X86::VPDPWSSDZrm:
MaddOpc = X86::VPMADDWDZrm;
AddOpc = X86::VPADDDZrr;
break;
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver4.td b/llvm/lib/Target/X86/X86ScheduleZnver4.td
index a93c7e3a82f17..cc300548a50e6 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver4.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver4.td
@@ -1567,7 +1567,7 @@ def Zn4WriteBUSDr_VPMADDr: SchedWriteRes<[Zn4FPFMisc01]> {
let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteBUSDr_VPMADDr], (instregex
- "VPDP(BU|WS)(S|P)(S|D|DS)(Z|Z128|Z256)(r|rk|rkz)",
+ "VPDP(BU|WS)(S|P)(S|D|DS)(Z?|Z128?|Z256?|Y?)r(r|rk|rkz)",
"VPMADD52(H|L)UQ(Z|Z128|Z256)(r|rk|rkz)"
)>;
diff --git a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-avxvnni.s b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-avxvnni.s
index 8152d18f56c30..4b73a7fc0e8b8 100644
--- a/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-avxvnni.s
+++ b/llvm/test/tools/llvm-mca/X86/AlderlakeP/resources-avxvnni.s
@@ -1,29 +1,29 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=alderlake -instruction-tables < %s | FileCheck %s
-vpdpbusd %xmm0, %xmm1, %xmm2
-vpdpbusd (%rax), %xmm1, %xmm2
+{vex} vpdpbusd %xmm0, %xmm1, %xmm2
+{vex} vpdpbusd (%rax), %xmm1, %xmm2
-vpdpbusd %ymm0, %ymm1, %ymm2
-vpdpbusd (%rax), %ymm1, %ymm2
+{vex} vpdpbusd %ymm0, %ymm1, %ymm2
+{vex} vpdpbusd (%rax), %ymm1, %ymm2
-vpdpbusds %xmm0, %xmm1, %xmm2
-vpdpbusds (%rax), %xmm1, %xmm2
+{vex} vpdpbusds %xmm0, %xmm1, %xmm2
+{vex} vpdpbusds (%rax), %xmm1, %xmm2
-vpdpbusds %ymm0, %ymm1, %ymm2
-vpdpbusds (%rax), %ymm1, %ymm2
+{vex} vpdpbusds %ymm0, %ymm1, %ymm2
+{vex} vpdpbusds (%rax), %ymm1, %ymm2
-vpdpwssd %xmm0, %xmm1, %xmm2
-vpdpwssd (%rax), %xmm1, %xmm2
+{vex} vpdpwssd %xmm0, %xmm1, %xmm2
+{vex} vpdpwssd (%rax), %xmm1, %xmm2
-vpdpwssd %ymm0, %ymm1, %ymm2
-vpdpwssd (%rax), %ymm1, %ymm2
+{vex} vpdpwssd %ymm0, %ymm1, %ymm2
+{vex} vpdpwssd (%rax), %ymm1, %ymm2
-vpdpwssds %xmm0, %xmm1, %xmm2
-vpdpwssds (%rax), %xmm1, %xmm2
+{vex} vpdpwssds %xmm0, %xmm1, %xmm2
+{vex} vpdpwssds (%rax), %xmm1, %xmm2
-vpdpwssds %ymm0, %ymm1, %ymm2
-vpdpwssds (%rax), %ymm1, %ymm2
+{vex} vpdpwssds %ymm0, %ymm1, %ymm2
+{vex} vpdpwssds (%rax), %ymm1, %ymm2
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
@@ -34,22 +34,22 @@ vpdpwssds (%rax), %ymm1, %ymm2
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 1 5 0.50 vpdpbusd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 2 13 0.50 * vpdpbusd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 5 0.50 vpdpbusd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 2 13 0.50 * vpdpbusd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 5 0.50 vpdpbusds %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 2 13 0.50 * vpdpbusds (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 5 0.50 vpdpbusds %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 2 13 0.50 * vpdpbusds (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 5 0.50 vpdpwssd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 2 13 0.50 * vpdpwssd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 5 0.50 vpdpwssd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 2 13 0.50 * vpdpwssd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 5 0.50 vpdpwssds %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 2 13 0.50 * vpdpwssds (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 5 0.50 vpdpwssds %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 2 13 0.50 * vpdpwssds (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 {vex} vpdpbusd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 13 0.50 * {vex} vpdpbusd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 {vex} vpdpbusd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 13 0.50 * {vex} vpdpbusd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 {vex} vpdpbusds %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 13 0.50 * {vex} vpdpbusds (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 {vex} vpdpbusds %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 13 0.50 * {vex} vpdpbusds (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 {vex} vpdpwssd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 13 0.50 * {vex} vpdpwssd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 {vex} vpdpwssd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 13 0.50 * {vex} vpdpwssd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 5 0.50 {vex} vpdpwssds %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 13 0.50 * {vex} vpdpwssds (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 5 0.50 {vex} vpdpwssds %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 13 0.50 * {vex} vpdpwssds (%rax), %ymm1, %ymm2
# CHECK: Resources:
# CHECK-NEXT: [0] - ADLPPort00
@@ -72,19 +72,19 @@ vpdpwssds (%rax), %ymm1, %ymm2
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions:
-# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusds %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpbusds %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpbusds (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssds %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - vpdpwssds %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - vpdpwssds (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - {vex} vpdpbusd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - {vex} vpdpbusd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - {vex} vpdpbusd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - {vex} vpdpbusd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - {vex} vpdpbusds %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - {vex} vpdpbusds (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - {vex} vpdpbusds %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - {vex} vpdpbusds (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - {vex} vpdpwssd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - {vex} vpdpwssd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - {vex} vpdpwssd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - {vex} vpdpwssd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - {vex} vpdpwssds %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - 0.33 - - {vex} vpdpwssds (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - {vex} vpdpwssds %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50 0.50 0.33 0.33 - - - - - - ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/157892
More information about the llvm-commits
mailing list