[llvm] r328698 - [X86][BtVer2] Fix the number of micro opcodes for AES[ENC|DEC] and other YMM instructions.
Andrea Di Biagio via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 28 05:12:04 PDT 2018
Author: adibiagio
Date: Wed Mar 28 05:12:04 2018
New Revision: 328698
URL: http://llvm.org/viewvc/llvm-project?rev=328698&view=rev
Log:
[X86][BtVer2] Fix the number of micro opcodes for AES[ENC|DEC] and other YMM instructions.
Similar to r328694. The number of micro opcodes should be 2 for those
instructions.
This was found when testing AVX code for BtVer2 using llvm-mca.
Modified:
llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s
Modified: llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td?rev=328698&r1=328697&r2=328698&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td Wed Mar 28 05:12:04 2018
@@ -372,7 +372,7 @@ def : WriteRes<WriteMMXMOVMSK, [JFPU0,
defm : JWriteResFpuPair<WriteAESIMC, [JFPU0, JVIMUL], 2>;
defm : JWriteResFpuPair<WriteAESKeyGen, [JFPU0, JVIMUL], 2>;
-defm : JWriteResFpuPair<WriteAESDecEnc, [JFPU0, JVIMUL], 3>;
+defm : JWriteResFpuPair<WriteAESDecEnc, [JFPU0, JVIMUL], 3, [1], 2>;
////////////////////////////////////////////////////////////////////////////////
// Horizontal add/sub instructions.
@@ -748,18 +748,21 @@ def : InstRW<[JWriteShuffleYLd, ReadAfte
def JWriteVBROADCASTYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
let Latency = 6;
let ResourceCycles = [1, 2, 4];
+ let NumMicroOps = 2;
}
def : InstRW<[JWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm, VBROADCASTSSYrm)>;
def JWriteFPAY22: SchedWriteRes<[JFPU0, JFPA]> {
let Latency = 2;
let ResourceCycles = [2, 2];
+ let NumMicroOps = 2;
}
def : InstRW<[JWriteFPAY22], (instregex "VCMPP(S|D)Yrri", "VM(AX|IN)P(D|S)Yrr")>;
def JWriteFPAY22Ld: SchedWriteRes<[JLAGU, JFPU0, JFPA]> {
let Latency = 7;
let ResourceCycles = [2, 2, 2];
+ let NumMicroOps = 2;
}
def : InstRW<[JWriteFPAY22Ld, ReadAfterLd], (instregex "VCMPP(S|D)Yrmi", "VM(AX|IN)P(D|S)Yrm")>;
Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s?rev=328698&r1=328697&r2=328698&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s Wed Mar 28 05:12:04 2018
@@ -1038,14 +1038,14 @@ vzeroupper
# CHECK-NEXT: 1 8 1.00 * vaddsubps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 3 2.00 vaddsubps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 8 2.00 * vaddsubps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 3 1.00 vaesdec %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 1 8 1.00 * vaesdec (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 3 1.00 vaesdeclast %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 1 8 1.00 * vaesdeclast (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 3 1.00 vaesenc %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 1 8 1.00 * vaesenc (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 3 1.00 vaesenclast %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 1 8 1.00 * vaesenclast (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 2 3 1.00 vaesdec %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 8 1.00 * vaesdec (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 2 3 1.00 vaesdeclast %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 8 1.00 * vaesdeclast (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 2 3 1.00 vaesenc %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 8 1.00 * vaesenc (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 2 3 1.00 vaesenclast %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 8 1.00 * vaesenclast (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 1.00 vaesimc %xmm0, %xmm2
# CHECK-NEXT: 1 7 1.00 * vaesimc (%rax), %xmm2
# CHECK-NEXT: 1 2 1.00 vaeskeygenassist $22, %xmm0, %xmm2
@@ -1083,17 +1083,17 @@ vzeroupper
# CHECK-NEXT: 6 3 3.00 vblendvps %ymm3, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 6 8 3.00 * vblendvps %ymm3, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 6 1.00 * vbroadcastf128 (%rax), %ymm2
-# CHECK-NEXT: 1 6 2.00 * vbroadcastsd (%rax), %ymm2
+# CHECK-NEXT: 2 6 2.00 * vbroadcastsd (%rax), %ymm2
# CHECK-NEXT: 1 6 1.00 * vbroadcastss (%rax), %xmm2
-# CHECK-NEXT: 1 6 2.00 * vbroadcastss (%rax), %ymm2
+# CHECK-NEXT: 2 6 2.00 * vbroadcastss (%rax), %ymm2
# CHECK-NEXT: 1 2 1.00 vcmppd $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vcmppd $0, (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 2 2.00 vcmppd $0, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 1 7 2.00 * vcmppd $0, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 2 2 2.00 vcmppd $0, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 7 2.00 * vcmppd $0, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 2 1.00 vcmpps $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vcmpps $0, (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 2 2.00 vcmpps $0, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 1 7 2.00 * vcmpps $0, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 2 2 2.00 vcmpps $0, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 7 2.00 * vcmpps $0, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 2 1.00 vcmpsd $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vcmpsd $0, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 1.00 vcmpss $0, %xmm0, %xmm1, %xmm2
@@ -1218,24 +1218,24 @@ vzeroupper
# CHECK-NEXT: 2 6 2.00 * * vmaskmovps %ymm0, %ymm1, (%rax)
# CHECK-NEXT: 1 2 1.00 vmaxpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vmaxpd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 2 2.00 vmaxpd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 1 7 2.00 * vmaxpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 2 2 2.00 vmaxpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 7 2.00 * vmaxpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 2 1.00 vmaxps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vmaxps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 2 2.00 vmaxps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 1 7 2.00 * vmaxps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 2 2 2.00 vmaxps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 7 2.00 * vmaxps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 2 1.00 vmaxsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vmaxsd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 1.00 vmaxss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vmaxss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 1.00 vminpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vminpd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 2 2.00 vminpd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 1 7 2.00 * vminpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 2 2 2.00 vminpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 7 2.00 * vminpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 2 1.00 vminps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vminps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 2 2.00 vminps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 1 7 2.00 * vminps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 2 2 2.00 vminps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 7 2.00 * vminps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 2 1.00 vminsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vminsd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 1.00 vminss %xmm0, %xmm1, %xmm2
More information about the llvm-commits
mailing list