[llvm] r328892 - [X86][BtVer2] Fixed the number of micro opcodes for AVX vector converts and
Andrea Di Biagio via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 30 11:53:47 PDT 2018
Author: adibiagio
Date: Fri Mar 30 11:53:47 2018
New Revision: 328892
URL: http://llvm.org/viewvc/llvm-project?rev=328892&view=rev
Log:
[X86][BtVer2] Fixed the number of micro opcodes for AVX vector converts and
VSQRT instructions.
There were still a few AVX instructions with an incorrect number of opcodes.
These should be fixed now.
Modified:
llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s
Modified: llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td?rev=328892&r1=328891&r2=328892&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td Fri Mar 30 11:53:47 2018
@@ -700,12 +700,14 @@ def : InstRW<[JWriteFCmpYLd, ReadAfterLd
def JWriteVCVTPDY: SchedWriteRes<[JFPU1, JSTC, JFPX]> {
let Latency = 6;
let ResourceCycles = [2, 2, 4];
+ let NumMicroOps = 3;
}
def : InstRW<[JWriteVCVTPDY], (instrs VCVTPD2DQYrr, VCVTTPD2DQYrr, VCVTPD2PSYrr)>;
def JWriteVCVTPDYLd: SchedWriteRes<[JLAGU, JFPU1, JSTC, JFPX]> {
let Latency = 11;
let ResourceCycles = [2, 2, 2, 4];
+ let NumMicroOps = 3;
}
def : InstRW<[JWriteVCVTPDYLd, ReadAfterLd], (instrs VCVTPD2DQYrm, VCVTTPD2DQYrm, VCVTPD2PSYrm)>;
@@ -757,8 +759,9 @@ def JWriteShuffleY: SchedWriteRes<[JFPU0
}
def : InstRW<[JWriteShuffleY], (instrs VBLENDPDYrri, VBLENDPSYrri,
VMOVDDUPYrr, VMOVSHDUPYrr, VMOVSLDUPYrr,
- VPERMILPDYri, VPERMILPSYri, VSHUFPDYrri, VSHUFPSYrri,
- VUNPCKHPDYrr, VUNPCKHPSYrr, VUNPCKLPDYrr, VUNPCKLPSYrr)>;
+ VPERMILPDYri, VPERMILPSYri, VSHUFPDYrri,
+ VSHUFPSYrri, VUNPCKHPDYrr, VUNPCKHPSYrr,
+ VUNPCKLPDYrr, VUNPCKLPSYrr)>;
def JWriteShuffleYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
let Latency = 6;
@@ -766,16 +769,20 @@ def JWriteShuffleYLd: SchedWriteRes<[JLA
let NumMicroOps = 2;
}
def : InstRW<[JWriteShuffleYLd, ReadAfterLd], (instrs VBLENDPDYrmi, VBLENDPSYrmi,
- VMOVDDUPYrm, VMOVSHDUPYrm, VMOVSLDUPYrm,
- VPERMILPDYmi, VPERMILPSYmi, VSHUFPDYrmi, VSHUFPSYrmi,
- VUNPCKHPDYrm, VUNPCKHPSYrm, VUNPCKLPDYrm, VUNPCKLPSYrm)>;
+ VMOVDDUPYrm, VMOVSHDUPYrm,
+ VMOVSLDUPYrm, VPERMILPDYmi,
+ VPERMILPSYmi, VSHUFPDYrmi,
+ VSHUFPSYrmi, VUNPCKHPDYrm,
+ VUNPCKHPSYrm, VUNPCKLPDYrm,
+ VUNPCKLPSYrm)>;
def JWriteVBROADCASTYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
let Latency = 6;
let ResourceCycles = [1, 2, 4];
let NumMicroOps = 2;
}
-def : InstRW<[JWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm, VBROADCASTSSYrm)>;
+def : InstRW<[JWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm,
+ VBROADCASTSSYrm)>;
def JWriteVMaskMovLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
let Latency = 6;
@@ -844,14 +851,16 @@ def : InstRW<[JWriteVSQRTPDLd], (instrs
SQRTSDm_Int, VSQRTSDm_Int)>;
def JWriteVSQRTYPD: SchedWriteRes<[JFPU1, JFPM]> {
- let Latency = 54;
+ let Latency = 54; // each uOp is 27cy.
let ResourceCycles = [2, 54];
+ let NumMicroOps = 2;
}
def : InstRW<[JWriteVSQRTYPD], (instrs VSQRTPDYr)>;
def JWriteVSQRTYPDLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> {
- let Latency = 59;
+ let Latency = 59; // each uOp is 27cy (+5cy of memory load).
let ResourceCycles = [2, 2, 54];
+ let NumMicroOps = 2;
}
def : InstRW<[JWriteVSQRTYPDLd], (instrs VSQRTPDYm)>;
Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s?rev=328892&r1=328891&r2=328892&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s Fri Mar 30 11:53:47 2018
@@ -1112,12 +1112,12 @@ vzeroupper
# CHECK-NEXT: 2 8 2.00 * vcvtdq2ps (%rax), %ymm2
# CHECK-NEXT: 1 3 1.00 vcvtpd2dq %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * vcvtpd2dqx (%rax), %xmm2
-# CHECK-NEXT: 1 6 2.00 vcvtpd2dq %ymm0, %xmm2
-# CHECK-NEXT: 1 11 2.00 * vcvtpd2dqy (%rax), %xmm2
+# CHECK-NEXT: 3 6 2.00 vcvtpd2dq %ymm0, %xmm2
+# CHECK-NEXT: 3 11 2.00 * vcvtpd2dqy (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 vcvtpd2ps %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * vcvtpd2psx (%rax), %xmm2
-# CHECK-NEXT: 1 6 2.00 vcvtpd2ps %ymm0, %xmm2
-# CHECK-NEXT: 1 11 2.00 * vcvtpd2psy (%rax), %xmm2
+# CHECK-NEXT: 3 6 2.00 vcvtpd2ps %ymm0, %xmm2
+# CHECK-NEXT: 3 11 2.00 * vcvtpd2psy (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 vcvtps2dq %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * vcvtps2dq (%rax), %xmm2
# CHECK-NEXT: 2 3 2.00 vcvtps2dq %ymm0, %ymm2
@@ -1148,8 +1148,8 @@ vzeroupper
# CHECK-NEXT: 2 12 1.00 * vcvtss2si (%rax), %rcx
# CHECK-NEXT: 1 3 1.00 vcvttpd2dq %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * vcvttpd2dqx (%rax), %xmm2
-# CHECK-NEXT: 1 6 2.00 vcvttpd2dq %ymm0, %xmm2
-# CHECK-NEXT: 1 11 2.00 * vcvttpd2dqy (%rax), %xmm2
+# CHECK-NEXT: 3 6 2.00 vcvttpd2dq %ymm0, %xmm2
+# CHECK-NEXT: 3 11 2.00 * vcvttpd2dqy (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 vcvttps2dq %xmm0, %xmm2
# CHECK-NEXT: 1 8 1.00 * vcvttps2dq (%rax), %xmm2
# CHECK-NEXT: 2 3 2.00 vcvttps2dq %ymm0, %ymm2
@@ -1640,8 +1640,8 @@ vzeroupper
# CHECK-NEXT: 2 6 2.00 * vshufps $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 27 27.00 vsqrtpd %xmm0, %xmm2
# CHECK-NEXT: 1 32 27.00 * vsqrtpd (%rax), %xmm2
-# CHECK-NEXT: 1 54 54.00 vsqrtpd %ymm0, %ymm2
-# CHECK-NEXT: 1 59 54.00 * vsqrtpd (%rax), %ymm2
+# CHECK-NEXT: 2 54 54.00 vsqrtpd %ymm0, %ymm2
+# CHECK-NEXT: 2 59 54.00 * vsqrtpd (%rax), %ymm2
# CHECK-NEXT: 1 21 21.00 vsqrtps %xmm0, %xmm2
# CHECK-NEXT: 1 26 21.00 * vsqrtps (%rax), %xmm2
# CHECK-NEXT: 2 42 42.00 vsqrtps %ymm0, %ymm2
More information about the llvm-commits
mailing list