[llvm] c6bdd8e - [X86] Improve the gather scheduler models for SkylakeClient and SkylakeServer
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 5 13:27:06 PST 2020
Author: Craig Topper
Date: 2020-02-05T13:26:47-08:00
New Revision: c6bdd8e73110e14dc54833137cecef9c07d2dc24
URL: https://github.com/llvm/llvm-project/commit/c6bdd8e73110e14dc54833137cecef9c07d2dc24
DIFF: https://github.com/llvm/llvm-project/commit/c6bdd8e73110e14dc54833137cecef9c07d2dc24.diff
LOG: [X86] Improve the gather scheduler models for SkylakeClient and SkylakeServer
The load ports need a cycle for each potentially loaded element just like Haswell and Skylake. Unlike Haswell and Broadwell, the number of uops does not scale with the number of elements. Instead the load uops run for multiple cycles.
I've taken the latency number from the uops.info. The port binding for the non-load uops is taken from the original IACA data I have.
Differential Revision: https://reviews.llvm.org/D74000
Added:
Modified:
llvm/lib/Target/X86/X86SchedSkylakeClient.td
llvm/lib/Target/X86/X86SchedSkylakeServer.td
llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s
llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s
llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx2.s
llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx2.s
llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s
llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
index 4dc3b199b601..0950203801af 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
@@ -1593,33 +1593,31 @@ def SKLWriteResGroup196 : SchedWriteRes<[SKLPort0,SKLPort23]> {
}
def: InstRW<[SKLWriteResGroup196], (instregex "DIV_F(32|64)m")>;
-def SKLWriteResGroup196_1 : SchedWriteRes<[SKLPort0, SKLPort23, SKLPort5, SKLPort015]> {
- let Latency = 22;
- let NumMicroOps = 5;
+def SKLWriteResGroupVEX2 : SchedWriteRes<[SKLPort0, SKLPort23, SKLPort5, SKLPort015]> {
+ let Latency = 18;
+ let NumMicroOps = 5; // 2 uops perform multiple loads
let ResourceCycles = [1,2,1,1];
}
-def: InstRW<[SKLWriteResGroup196_1], (instrs VGATHERDPSrm,
- VGATHERDPDrm,
- VGATHERQPDrm,
- VGATHERQPSrm,
- VPGATHERDDrm,
- VPGATHERDQrm,
- VPGATHERQDrm,
- VPGATHERQQrm)>;
+def: InstRW<[SKLWriteResGroupVEX2], (instrs VGATHERDPDrm, VPGATHERDQrm,
+ VGATHERQPDrm, VPGATHERQQrm,
+ VGATHERQPSrm, VPGATHERQDrm)>;
-def SKLWriteResGroup196_2 : SchedWriteRes<[SKLPort0, SKLPort23, SKLPort5, SKLPort015]> {
- let Latency = 25;
- let NumMicroOps = 5;
- let ResourceCycles = [1,2,1,1];
+def SKLWriteResGroupVEX4 : SchedWriteRes<[SKLPort0, SKLPort23, SKLPort5, SKLPort015]> {
+ let Latency = 20;
+ let NumMicroOps = 5; // 2 uops peform multiple loads
+ let ResourceCycles = [1,4,1,1];
+}
+def: InstRW<[SKLWriteResGroupVEX4], (instrs VGATHERDPDYrm, VPGATHERDQYrm,
+ VGATHERDPSrm, VPGATHERDDrm,
+ VGATHERQPDYrm, VPGATHERQQYrm,
+ VGATHERQPSYrm, VPGATHERQDYrm)>;
+
+def SKLWriteResGroupVEX8 : SchedWriteRes<[SKLPort0, SKLPort23, SKLPort5, SKLPort015]> {
+ let Latency = 22;
+ let NumMicroOps = 5; // 2 uops perform multiple loads
+ let ResourceCycles = [1,8,1,1];
}
-def: InstRW<[SKLWriteResGroup196_2], (instrs VGATHERDPSYrm,
- VGATHERQPDYrm,
- VGATHERQPSYrm,
- VPGATHERDDYrm,
- VPGATHERDQYrm,
- VPGATHERQDYrm,
- VPGATHERQQYrm,
- VGATHERDPDYrm)>;
+def: InstRW<[SKLWriteResGroupVEX8], (instrs VGATHERDPSYrm, VPGATHERDDYrm)>;
def SKLWriteResGroup198 : SchedWriteRes<[SKLPort0,SKLPort4,SKLPort5,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> {
let Latency = 23;
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index 3fdc8dda1d06..76c8f0dbac3d 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -2145,14 +2145,6 @@ def SKXWriteResGroup211 : SchedWriteRes<[SKXPort23,SKXPort015]> {
def: InstRW<[SKXWriteResGroup211], (instregex "VPMULLQZ256rm(b?)",
"VPMULLQZrm(b?)")>;
-def SKXWriteResGroup214 : SchedWriteRes<[]> {
- let Latency = 20;
- let NumMicroOps = 0;
-}
-def: InstRW<[SKXWriteResGroup214], (instrs VGATHERDPSZ128rm,
- VGATHERQPSZrm,
- VPGATHERDDZ128rm)>;
-
def SKXWriteResGroup215 : SchedWriteRes<[SKXPort0]> {
let Latency = 20;
let NumMicroOps = 1;
@@ -2167,15 +2159,41 @@ def SKXWriteResGroup216 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> {
}
def : SchedAlias<WriteFDiv64XLd, SKXWriteResGroup216>; // TODO - convert to ZnWriteResFpuPair
-def SKXWriteResGroup218 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
- let Latency = 20;
- let NumMicroOps = 5;
+def SKXWriteGatherEVEX2 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
+ let Latency = 17;
+ let NumMicroOps = 5; // 2 uops perform multiple loads
let ResourceCycles = [1,2,1,1];
}
-def: InstRW<[SKXWriteResGroup218], (instrs VGATHERQPSZ128rm,
- VGATHERQPSZ256rm,
- VPGATHERQDZ128rm,
- VPGATHERQDZ256rm)>;
+def: InstRW<[SKXWriteGatherEVEX2], (instrs VGATHERQPSZ128rm, VPGATHERQDZ128rm,
+ VGATHERDPDZ128rm, VPGATHERDQZ128rm,
+ VGATHERQPDZ128rm, VPGATHERQQZ128rm)>;
+
+def SKXWriteGatherEVEX4 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
+ let Latency = 19;
+ let NumMicroOps = 5; // 2 uops perform multiple loads
+ let ResourceCycles = [1,4,1,1];
+}
+def: InstRW<[SKXWriteGatherEVEX4], (instrs VGATHERQPSZ256rm, VPGATHERQDZ256rm,
+ VGATHERQPDZ256rm, VPGATHERQQZ256rm,
+ VGATHERDPSZ128rm, VPGATHERDDZ128rm,
+ VGATHERDPDZ256rm, VPGATHERDQZ256rm)>;
+
+def SKXWriteGatherEVEX8 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
+ let Latency = 21;
+ let NumMicroOps = 5; // 2 uops perform multiple loads
+ let ResourceCycles = [1,8,1,1];
+}
+def: InstRW<[SKXWriteGatherEVEX8], (instrs VGATHERDPSZ256rm, VPGATHERDDZ256rm,
+ VGATHERDPDZrm, VPGATHERDQZrm,
+ VGATHERQPDZrm, VPGATHERQQZrm,
+ VGATHERQPSZrm, VPGATHERQDZrm)>;
+
+def SKXWriteGatherEVEX16 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
+ let Latency = 25;
+ let NumMicroOps = 5; // 2 uops perform multiple loads
+ let ResourceCycles = [1,16,1,1];
+}
+def: InstRW<[SKXWriteGatherEVEX16], (instrs VGATHERDPSZrm, VPGATHERDDZrm)>;
def SKXWriteResGroup219 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort6,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> {
let Latency = 20;
@@ -2205,57 +2223,31 @@ def SKXWriteResGroup223 : SchedWriteRes<[SKXPort0,SKXPort23]> {
}
def: InstRW<[SKXWriteResGroup223], (instregex "DIV_F(32|64)m")>;
-def SKXWriteResGroup224 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
- let Latency = 22;
- let NumMicroOps = 5;
+def SKXWriteResGroupVEX2 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> {
+ let Latency = 18;
+ let NumMicroOps = 5; // 2 uops perform multiple loads
let ResourceCycles = [1,2,1,1];
}
-def: InstRW<[SKXWriteResGroup224], (instrs VGATHERDPDZ128rm,
- VGATHERQPDZ128rm,
- VPGATHERDQZ128rm,
- VPGATHERQQZ128rm)>;
+def: InstRW<[SKXWriteResGroupVEX2], (instrs VGATHERDPDrm, VPGATHERDQrm,
+ VGATHERQPDrm, VPGATHERQQrm,
+ VGATHERQPSrm, VPGATHERQDrm)>;
-def SKXWriteResGroup224_2 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> {
- let Latency = 22;
- let NumMicroOps = 5;
- let ResourceCycles = [1,2,1,1];
+def SKXWriteResGroupVEX4 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> {
+ let Latency = 20;
+ let NumMicroOps = 5; // 2 uops peform multiple loads
+ let ResourceCycles = [1,4,1,1];
}
-def: InstRW<[SKXWriteResGroup224_2], (instrs VGATHERDPSrm,
- VGATHERDPDrm,
- VGATHERQPDrm,
- VGATHERQPSrm,
- VPGATHERDDrm,
- VPGATHERDQrm,
- VPGATHERQDrm,
- VPGATHERQQrm,
- VPGATHERDDrm,
- VPGATHERQDrm,
- VPGATHERDQrm,
- VPGATHERQQrm,
- VGATHERDPSrm,
- VGATHERQPSrm,
- VGATHERDPDrm,
- VGATHERQPDrm)>;
-
-def SKXWriteResGroup224_3 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> {
- let Latency = 25;
- let NumMicroOps = 5;
- let ResourceCycles = [1,2,1,1];
+def: InstRW<[SKXWriteResGroupVEX4], (instrs VGATHERDPDYrm, VPGATHERDQYrm,
+ VGATHERDPSrm, VPGATHERDDrm,
+ VGATHERQPDYrm, VPGATHERQQYrm,
+ VGATHERQPSYrm, VPGATHERQDYrm)>;
+
+def SKXWriteResGroupVEX8 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> {
+ let Latency = 22;
+ let NumMicroOps = 5; // 2 uops perform multiple loads
+ let ResourceCycles = [1,8,1,1];
}
-def: InstRW<[SKXWriteResGroup224_3], (instrs VGATHERDPSYrm,
- VGATHERQPDYrm,
- VGATHERQPSYrm,
- VPGATHERDDYrm,
- VPGATHERDQYrm,
- VPGATHERQDYrm,
- VPGATHERQQYrm,
- VPGATHERDDYrm,
- VPGATHERQDYrm,
- VPGATHERDQYrm,
- VPGATHERQQYrm,
- VGATHERDPSYrm,
- VGATHERQPSYrm,
- VGATHERDPDYrm)>;
+def: InstRW<[SKXWriteResGroupVEX8], (instrs VGATHERDPSYrm, VPGATHERDDYrm)>;
def SKXWriteResGroup225 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort015]> {
let Latency = 22;
@@ -2279,27 +2271,6 @@ def SKXWriteResGroup233 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
}
def: InstRW<[SKXWriteResGroup233], (instregex "DIV_FI(16|32)m")>;
-def SKXWriteResGroup234 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
- let Latency = 25;
- let NumMicroOps = 5;
- let ResourceCycles = [1,2,1,1];
-}
-def: InstRW<[SKXWriteResGroup234], (instrs VGATHERDPDZ256rm,
- VGATHERQPDZ256rm,
- VPGATHERDQZ256rm,
- VPGATHERQDZrm,
- VPGATHERQQZ256rm)>;
-
-def SKXWriteResGroup238 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
- let Latency = 26;
- let NumMicroOps = 5;
- let ResourceCycles = [1,2,1,1];
-}
-def: InstRW<[SKXWriteResGroup238], (instrs VGATHERDPDZrm,
- VGATHERQPDZrm,
- VPGATHERDQZrm,
- VPGATHERQQZrm)>;
-
def SKXWriteResGroup239 : SchedWriteRes<[SKXPort0,SKXPort23]> {
let Latency = 27;
let NumMicroOps = 2;
@@ -2307,14 +2278,6 @@ def SKXWriteResGroup239 : SchedWriteRes<[SKXPort0,SKXPort23]> {
}
def: InstRW<[SKXWriteResGroup239], (instregex "DIVR_F(32|64)m")>;
-def SKXWriteResGroup240 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
- let Latency = 27;
- let NumMicroOps = 5;
- let ResourceCycles = [1,2,1,1];
-}
-def: InstRW<[SKXWriteResGroup240], (instrs VGATHERDPSZ256rm,
- VPGATHERDDZ256rm)>;
-
def SKXWriteResGroup242 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23,SKXPort015]> {
let Latency = 29;
let NumMicroOps = 15;
@@ -2329,14 +2292,6 @@ def SKXWriteResGroup243 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
}
def: InstRW<[SKXWriteResGroup243], (instregex "DIVR_FI(16|32)m")>;
-def SKXWriteResGroup245 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
- let Latency = 30;
- let NumMicroOps = 5;
- let ResourceCycles = [1,2,1,1];
-}
-def: InstRW<[SKXWriteResGroup245], (instrs VGATHERDPSZrm,
- VPGATHERDDZrm)>;
-
def SKXWriteResGroup247 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort23,SKXPort06,SKXPort0156]> {
let Latency = 35;
let NumMicroOps = 23;
diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s
index 1bbd0f5ed704..d232058859c5 100644
--- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s
+++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s
@@ -181,6 +181,11 @@ vpaddq %zmm16, %zmm17, %zmm19 {z}{k1}
vpaddq (%rax), %zmm17, %zmm19 {z}{k1}
vpaddq (%rax){1to8}, %zmm17, %zmm19 {z}{k1}
+vpgatherdq (%rax,%ymm1,2), %zmm2 {k1}
+vpgatherdd (%rax,%zmm1,2), %zmm2 {k1}
+vpgatherqq (%rax,%zmm1,2), %zmm2 {k1}
+vpgatherqd (%rax,%zmm1,2), %ymm2 {k1}
+
vpmulld %zmm16, %zmm17, %zmm19
vpmulld (%rax), %zmm17, %zmm19
vpmulld (%rax){1to16}, %zmm17, %zmm19
@@ -686,6 +691,10 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: 1 1 0.50 vpaddq %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 8 0.50 * vpaddq (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 8 0.50 * vpaddq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: 1 5 0.50 * vpgatherdq (%rax,%ymm1,2), %zmm2 {%k1}
+# CHECK-NEXT: 1 5 0.50 * vpgatherdd (%rax,%zmm1,2), %zmm2 {%k1}
+# CHECK-NEXT: 1 5 0.50 * vpgatherqq (%rax,%zmm1,2), %zmm2 {%k1}
+# CHECK-NEXT: 1 5 0.50 * vpgatherqd (%rax,%zmm1,2), %ymm2 {%k1}
# CHECK-NEXT: 1 5 1.00 vpmulld %zmm16, %zmm17, %zmm19
# CHECK-NEXT: 2 12 1.00 * vpmulld (%rax), %zmm17, %zmm19
# CHECK-NEXT: 2 12 1.00 * vpmulld (%rax){1to16}, %zmm17, %zmm19
@@ -999,7 +1008,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
-# CHECK-NEXT: - 1506.00 129.00 144.00 - 270.00 153.00 153.00
+# CHECK-NEXT: - 1506.00 129.00 144.00 - 270.00 155.00 155.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
@@ -1165,6 +1174,10 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: - - - 0.50 - 0.50 - - vpaddq %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpaddq (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpaddq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: - - - - - - 0.50 0.50 vpgatherdq (%rax,%ymm1,2), %zmm2 {%k1}
+# CHECK-NEXT: - - - - - - 0.50 0.50 vpgatherdd (%rax,%zmm1,2), %zmm2 {%k1}
+# CHECK-NEXT: - - - - - - 0.50 0.50 vpgatherqq (%rax,%zmm1,2), %zmm2 {%k1}
+# CHECK-NEXT: - - - - - - 0.50 0.50 vpgatherqd (%rax,%zmm1,2), %ymm2 {%k1}
# CHECK-NEXT: - - 1.00 - - - - - vpmulld %zmm16, %zmm17, %zmm19
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmulld (%rax), %zmm17, %zmm19
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmulld (%rax){1to16}, %zmm17, %zmm19
diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s
index 6ac3448c0195..6d43b8c54b40 100644
--- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s
+++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s
@@ -121,6 +121,16 @@ vdivps %ymm16, %ymm17, %ymm19 {z}{k1}
vdivps (%rax), %ymm17, %ymm19 {z}{k1}
vdivps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
+vgatherdpd (%rax,%xmm1,2), %ymm2 {k1}
+vgatherdps (%rax,%ymm1,2), %ymm2 {k1}
+vgatherqpd (%rax,%ymm1,2), %ymm2 {k1}
+vgatherqps (%rax,%ymm1,2), %xmm2 {k1}
+
+vgatherdpd (%rax,%xmm1,2), %xmm2 {k1}
+vgatherdps (%rax,%xmm1,2), %xmm2 {k1}
+vgatherqpd (%rax,%xmm1,2), %xmm2 {k1}
+vgatherqps (%rax,%xmm1,2), %xmm2 {k1}
+
vmaxpd %xmm16, %xmm17, %xmm19
vmaxpd (%rax), %xmm17, %xmm19
vmaxpd (%rax){1to2}, %xmm17, %xmm19
@@ -421,6 +431,16 @@ vpermq %ymm16, %ymm17, %ymm19 {z}{k1}
vpermq (%rax), %ymm17, %ymm19 {z}{k1}
vpermq (%rax){1to4}, %ymm17, %ymm19 {z}{k1}
+vpgatherdq (%rax,%xmm1,2), %ymm2 {k1}
+vpgatherdd (%rax,%ymm1,2), %ymm2 {k1}
+vpgatherqq (%rax,%ymm1,2), %ymm2 {k1}
+vpgatherqd (%rax,%ymm1,2), %xmm2 {k1}
+
+vpgatherdq (%rax,%xmm1,2), %xmm2 {k1}
+vpgatherdd (%rax,%xmm1,2), %xmm2 {k1}
+vpgatherqq (%rax,%xmm1,2), %xmm2 {k1}
+vpgatherqd (%rax,%xmm1,2), %xmm2 {k1}
+
vpmulld %xmm16, %xmm17, %xmm19
vpmulld (%rax), %xmm17, %xmm19
vpmulld (%rax){1to4}, %xmm17, %xmm19
@@ -858,6 +878,14 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK-NEXT: 3 29 28.00 vdivps %ymm16, %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: 4 36 28.00 * vdivps (%rax), %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: 4 36 28.00 * vdivps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: 1 5 0.50 * vgatherdpd (%rax,%xmm1,2), %ymm2 {%k1}
+# CHECK-NEXT: 1 5 0.50 * vgatherdps (%rax,%ymm1,2), %ymm2 {%k1}
+# CHECK-NEXT: 1 5 0.50 * vgatherqpd (%rax,%ymm1,2), %ymm2 {%k1}
+# CHECK-NEXT: 1 5 0.50 * vgatherqps (%rax,%ymm1,2), %xmm2 {%k1}
+# CHECK-NEXT: 1 5 0.50 * vgatherdpd (%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT: 1 5 0.50 * vgatherdps (%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT: 1 5 0.50 * vgatherqpd (%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT: 1 5 0.50 * vgatherqps (%rax,%xmm1,2), %xmm2 {%k1}
# CHECK-NEXT: 1 3 1.00 vmaxpd %xmm16, %xmm17, %xmm19
# CHECK-NEXT: 2 9 1.00 * vmaxpd (%rax), %xmm17, %xmm19
# CHECK-NEXT: 2 9 1.00 * vmaxpd (%rax){1to2}, %xmm17, %xmm19
@@ -1128,6 +1156,14 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK-NEXT: 1 1 1.00 vpermq %ymm16, %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: 2 8 1.00 * vpermq (%rax), %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: 2 8 1.00 * vpermq (%rax){1to4}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: 1 5 0.50 * vpgatherdq (%rax,%xmm1,2), %ymm2 {%k1}
+# CHECK-NEXT: 1 5 0.50 * vpgatherdd (%rax,%ymm1,2), %ymm2 {%k1}
+# CHECK-NEXT: 1 5 0.50 * vpgatherqq (%rax,%ymm1,2), %ymm2 {%k1}
+# CHECK-NEXT: 1 5 0.50 * vpgatherqd (%rax,%ymm1,2), %xmm2 {%k1}
+# CHECK-NEXT: 1 5 0.50 * vpgatherdq (%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT: 1 5 0.50 * vpgatherdd (%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT: 1 5 0.50 * vpgatherqq (%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT: 1 5 0.50 * vpgatherqd (%rax,%xmm1,2), %xmm2 {%k1}
# CHECK-NEXT: 1 5 1.00 vpmulld %xmm16, %xmm17, %xmm19
# CHECK-NEXT: 2 11 1.00 * vpmulld (%rax), %xmm17, %xmm19
# CHECK-NEXT: 2 11 1.00 * vpmulld (%rax){1to4}, %xmm17, %xmm19
@@ -1429,7 +1465,7 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
-# CHECK-NEXT: - 1935.00 180.00 229.50 - 346.50 222.00 222.00
+# CHECK-NEXT: - 1935.00 180.00 229.50 - 346.50 230.00 230.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
@@ -1541,6 +1577,14 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK-NEXT: - 28.00 2.50 - - 0.50 - - vdivps %ymm16, %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: - 28.00 2.50 - - 0.50 0.50 0.50 vdivps (%rax), %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: - 28.00 2.50 - - 0.50 0.50 0.50 vdivps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: - - - - - - 0.50 0.50 vgatherdpd (%rax,%xmm1,2), %ymm2 {%k1}
+# CHECK-NEXT: - - - - - - 0.50 0.50 vgatherdps (%rax,%ymm1,2), %ymm2 {%k1}
+# CHECK-NEXT: - - - - - - 0.50 0.50 vgatherqpd (%rax,%ymm1,2), %ymm2 {%k1}
+# CHECK-NEXT: - - - - - - 0.50 0.50 vgatherqps (%rax,%ymm1,2), %xmm2 {%k1}
+# CHECK-NEXT: - - - - - - 0.50 0.50 vgatherdpd (%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT: - - - - - - 0.50 0.50 vgatherdps (%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT: - - - - - - 0.50 0.50 vgatherqpd (%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT: - - - - - - 0.50 0.50 vgatherqps (%rax,%xmm1,2), %xmm2 {%k1}
# CHECK-NEXT: - - - 1.00 - - - - vmaxpd %xmm16, %xmm17, %xmm19
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vmaxpd (%rax), %xmm17, %xmm19
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vmaxpd (%rax){1to2}, %xmm17, %xmm19
@@ -1811,6 +1855,14 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK-NEXT: - - - - - 1.00 - - vpermq %ymm16, %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpermq (%rax), %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpermq (%rax){1to4}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: - - - - - - 0.50 0.50 vpgatherdq (%rax,%xmm1,2), %ymm2 {%k1}
+# CHECK-NEXT: - - - - - - 0.50 0.50 vpgatherdd (%rax,%ymm1,2), %ymm2 {%k1}
+# CHECK-NEXT: - - - - - - 0.50 0.50 vpgatherqq (%rax,%ymm1,2), %ymm2 {%k1}
+# CHECK-NEXT: - - - - - - 0.50 0.50 vpgatherqd (%rax,%ymm1,2), %xmm2 {%k1}
+# CHECK-NEXT: - - - - - - 0.50 0.50 vpgatherdq (%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT: - - - - - - 0.50 0.50 vpgatherdd (%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT: - - - - - - 0.50 0.50 vpgatherqq (%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT: - - - - - - 0.50 0.50 vpgatherqd (%rax,%xmm1,2), %xmm2 {%k1}
# CHECK-NEXT: - - 1.00 - - - - - vpmulld %xmm16, %xmm17, %xmm19
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmulld (%rax), %xmm17, %xmm19
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmulld (%rax){1to4}, %xmm17, %xmm19
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx2.s
index 78e7ac0d8769..9f6b518da9d8 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx2.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx2.s
@@ -465,14 +465,14 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 1.00 vbroadcastss %xmm0, %ymm0
# CHECK-NEXT: 1 3 1.00 vextracti128 $1, %ymm0, %xmm2
# CHECK-NEXT: 2 1 1.00 * vextracti128 $1, %ymm0, (%rax)
-# CHECK-NEXT: 5 22 1.00 * vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT: 5 25 1.00 * vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2
-# CHECK-NEXT: 5 22 1.00 * vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT: 5 25 1.00 * vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2
-# CHECK-NEXT: 5 22 1.00 * vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT: 5 25 1.00 * vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2
-# CHECK-NEXT: 5 22 1.00 * vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT: 5 25 1.00 * vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2
+# CHECK-NEXT: 5 18 1.00 * vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2
+# CHECK-NEXT: 5 20 2.00 * vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2
+# CHECK-NEXT: 5 20 2.00 * vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2
+# CHECK-NEXT: 5 22 4.00 * vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2
+# CHECK-NEXT: 5 18 1.00 * vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2
+# CHECK-NEXT: 5 20 2.00 * vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2
+# CHECK-NEXT: 5 18 1.00 * vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2
+# CHECK-NEXT: 5 20 2.00 * vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2
# CHECK-NEXT: 1 3 1.00 vinserti128 $1, %xmm0, %ymm1, %ymm2
# CHECK-NEXT: 2 7 0.50 * vinserti128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 7 0.50 * vmovntdqa (%rax), %ymm0
@@ -568,14 +568,14 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 2 10 1.00 * vpermps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 1.00 vpermq $1, %ymm0, %ymm2
# CHECK-NEXT: 2 10 1.00 * vpermq $1, (%rax), %ymm2
-# CHECK-NEXT: 5 22 1.00 * vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT: 5 25 1.00 * vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2
-# CHECK-NEXT: 5 22 1.00 * vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT: 5 25 1.00 * vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2
-# CHECK-NEXT: 5 22 1.00 * vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT: 5 25 1.00 * vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2
-# CHECK-NEXT: 5 22 1.00 * vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT: 5 25 1.00 * vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2
+# CHECK-NEXT: 5 20 2.00 * vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2
+# CHECK-NEXT: 5 22 4.00 * vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2
+# CHECK-NEXT: 5 18 1.00 * vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2
+# CHECK-NEXT: 5 20 2.00 * vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2
+# CHECK-NEXT: 5 18 1.00 * vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2
+# CHECK-NEXT: 5 20 2.00 * vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2
+# CHECK-NEXT: 5 18 1.00 * vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2
+# CHECK-NEXT: 5 20 2.00 * vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: 3 3 2.00 vphaddd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 4 10 2.00 * vphaddd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 3 3 2.00 vphaddsw %ymm0, %ymm1, %ymm2
@@ -776,7 +776,7 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - - 110.33 89.33 85.67 85.67 1.00 164.33 - 1.67
+# CHECK-NEXT: - - 110.33 89.33 99.67 99.67 1.00 164.33 - 1.67
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -786,13 +786,13 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - 1.00 - - vextracti128 $1, %ymm0, %xmm2
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vextracti128 $1, %ymm0, (%rax)
# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2
-# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2
+# CHECK-NEXT: - - 1.33 0.33 2.00 2.00 - 1.33 - - vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2
+# CHECK-NEXT: - - 1.33 0.33 2.00 2.00 - 1.33 - - vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2
+# CHECK-NEXT: - - 1.33 0.33 4.00 4.00 - 1.33 - - vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2
+# CHECK-NEXT: - - 1.33 0.33 2.00 2.00 - 1.33 - - vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2
+# CHECK-NEXT: - - 1.33 0.33 2.00 2.00 - 1.33 - - vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2
# CHECK-NEXT: - - - - - - - 1.00 - - vinserti128 $1, %xmm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vinserti128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vmovntdqa (%rax), %ymm0
@@ -888,14 +888,14 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpermps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - 1.00 - - vpermq $1, %ymm0, %ymm2
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpermq $1, (%rax), %ymm2
-# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2
+# CHECK-NEXT: - - 1.33 0.33 2.00 2.00 - 1.33 - - vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2
+# CHECK-NEXT: - - 1.33 0.33 4.00 4.00 - 1.33 - - vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2
+# CHECK-NEXT: - - 1.33 0.33 2.00 2.00 - 1.33 - - vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2
# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2
+# CHECK-NEXT: - - 1.33 0.33 2.00 2.00 - 1.33 - - vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2
# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2
+# CHECK-NEXT: - - 1.33 0.33 2.00 2.00 - 1.33 - - vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - vphaddd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - vphaddd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - vphaddsw %ymm0, %ymm1, %ymm2
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx2.s
index 7d5257c2699e..f9c51a18ff7d 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx2.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx2.s
@@ -465,14 +465,14 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 1.00 vbroadcastss %xmm0, %ymm0
# CHECK-NEXT: 1 3 1.00 vextracti128 $1, %ymm0, %xmm2
# CHECK-NEXT: 2 1 1.00 * vextracti128 $1, %ymm0, (%rax)
-# CHECK-NEXT: 5 22 1.00 * vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT: 5 25 1.00 * vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2
-# CHECK-NEXT: 5 22 1.00 * vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT: 5 25 1.00 * vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2
-# CHECK-NEXT: 5 22 1.00 * vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT: 5 25 1.00 * vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2
-# CHECK-NEXT: 5 22 1.00 * vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT: 5 25 1.00 * vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2
+# CHECK-NEXT: 5 18 1.00 * vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2
+# CHECK-NEXT: 5 20 2.00 * vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2
+# CHECK-NEXT: 5 20 2.00 * vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2
+# CHECK-NEXT: 5 22 4.00 * vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2
+# CHECK-NEXT: 5 18 1.00 * vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2
+# CHECK-NEXT: 5 20 2.00 * vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2
+# CHECK-NEXT: 5 18 1.00 * vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2
+# CHECK-NEXT: 5 20 2.00 * vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2
# CHECK-NEXT: 1 3 1.00 vinserti128 $1, %xmm0, %ymm1, %ymm2
# CHECK-NEXT: 2 7 0.50 * vinserti128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 7 0.50 * vmovntdqa (%rax), %ymm0
@@ -568,14 +568,14 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 2 10 1.00 * vpermps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 1.00 vpermq $1, %ymm0, %ymm2
# CHECK-NEXT: 2 10 1.00 * vpermq $1, (%rax), %ymm2
-# CHECK-NEXT: 5 22 1.00 * vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT: 5 25 1.00 * vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2
-# CHECK-NEXT: 5 22 1.00 * vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT: 5 25 1.00 * vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2
-# CHECK-NEXT: 5 22 1.00 * vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT: 5 25 1.00 * vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2
-# CHECK-NEXT: 5 22 1.00 * vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT: 5 25 1.00 * vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2
+# CHECK-NEXT: 5 20 2.00 * vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2
+# CHECK-NEXT: 5 22 4.00 * vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2
+# CHECK-NEXT: 5 18 1.00 * vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2
+# CHECK-NEXT: 5 20 2.00 * vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2
+# CHECK-NEXT: 5 18 1.00 * vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2
+# CHECK-NEXT: 5 20 2.00 * vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2
+# CHECK-NEXT: 5 18 1.00 * vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2
+# CHECK-NEXT: 5 20 2.00 * vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: 3 3 2.00 vphaddd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 4 10 2.00 * vphaddd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 3 3 2.00 vphaddsw %ymm0, %ymm1, %ymm2
@@ -776,7 +776,7 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - - 110.33 89.33 85.67 85.67 1.00 164.33 - 1.67
+# CHECK-NEXT: - - 110.33 89.33 99.67 99.67 1.00 164.33 - 1.67
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -786,13 +786,13 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - 1.00 - - vextracti128 $1, %ymm0, %xmm2
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vextracti128 $1, %ymm0, (%rax)
# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2
-# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2
+# CHECK-NEXT: - - 1.33 0.33 2.00 2.00 - 1.33 - - vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2
+# CHECK-NEXT: - - 1.33 0.33 2.00 2.00 - 1.33 - - vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2
+# CHECK-NEXT: - - 1.33 0.33 4.00 4.00 - 1.33 - - vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2
+# CHECK-NEXT: - - 1.33 0.33 2.00 2.00 - 1.33 - - vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2
+# CHECK-NEXT: - - 1.33 0.33 2.00 2.00 - 1.33 - - vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2
# CHECK-NEXT: - - - - - - - 1.00 - - vinserti128 $1, %xmm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vinserti128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - 0.50 0.50 - - - - vmovntdqa (%rax), %ymm0
@@ -888,14 +888,14 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpermps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - 1.00 - - vpermq $1, %ymm0, %ymm2
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpermq $1, (%rax), %ymm2
-# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2
+# CHECK-NEXT: - - 1.33 0.33 2.00 2.00 - 1.33 - - vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2
+# CHECK-NEXT: - - 1.33 0.33 4.00 4.00 - 1.33 - - vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2
+# CHECK-NEXT: - - 1.33 0.33 2.00 2.00 - 1.33 - - vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2
# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2
+# CHECK-NEXT: - - 1.33 0.33 2.00 2.00 - 1.33 - - vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2
# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2
+# CHECK-NEXT: - - 1.33 0.33 2.00 2.00 - 1.33 - - vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - vphaddd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - vphaddd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - vphaddsw %ymm0, %ymm1, %ymm2
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s
index 957fa6677c1a..e99030b7d439 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s
@@ -81,6 +81,11 @@ vdivps %zmm16, %zmm17, %zmm19 {z}{k1}
vdivps (%rax), %zmm17, %zmm19 {z}{k1}
vdivps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
+vgatherdpd (%rax,%ymm1,2), %zmm2 {k1}
+vgatherdps (%rax,%zmm1,2), %zmm2 {k1}
+vgatherqpd (%rax,%zmm1,2), %zmm2 {k1}
+vgatherqps (%rax,%zmm1,2), %ymm2 {k1}
+
vmaxpd %zmm16, %zmm17, %zmm19
vmaxpd (%rax), %zmm17, %zmm19
vmaxpd (%rax){1to8}, %zmm17, %zmm19
@@ -181,6 +186,11 @@ vpaddq %zmm16, %zmm17, %zmm19 {z}{k1}
vpaddq (%rax), %zmm17, %zmm19 {z}{k1}
vpaddq (%rax){1to8}, %zmm17, %zmm19 {z}{k1}
+vpgatherdq (%rax,%ymm1,2), %zmm2 {k1}
+vpgatherdd (%rax,%zmm1,2), %zmm2 {k1}
+vpgatherqq (%rax,%zmm1,2), %zmm2 {k1}
+vpgatherqd (%rax,%zmm1,2), %ymm2 {k1}
+
vpmulld %zmm16, %zmm17, %zmm19
vpmulld (%rax), %zmm17, %zmm19
vpmulld (%rax){1to16}, %zmm17, %zmm19
@@ -596,6 +606,10 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: 3 18 10.00 vdivps %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 4 25 10.00 * vdivps (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 4 25 10.00 * vdivps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: 5 21 4.00 * vgatherdpd (%rax,%ymm1,2), %zmm2 {%k1}
+# CHECK-NEXT: 5 25 8.00 * vgatherdps (%rax,%zmm1,2), %zmm2 {%k1}
+# CHECK-NEXT: 5 21 4.00 * vgatherqpd (%rax,%zmm1,2), %zmm2 {%k1}
+# CHECK-NEXT: 5 21 4.00 * vgatherqps (%rax,%zmm1,2), %ymm2 {%k1}
# CHECK-NEXT: 1 4 0.50 vmaxpd %zmm16, %zmm17, %zmm19
# CHECK-NEXT: 2 11 0.50 * vmaxpd (%rax), %zmm17, %zmm19
# CHECK-NEXT: 2 11 0.50 * vmaxpd (%rax){1to8}, %zmm17, %zmm19
@@ -686,6 +700,10 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: 1 1 0.33 vpaddq %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 8 0.50 * vpaddq (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: 2 8 0.50 * vpaddq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: 5 21 4.00 * vpgatherdq (%rax,%ymm1,2), %zmm2 {%k1}
+# CHECK-NEXT: 5 25 8.00 * vpgatherdd (%rax,%zmm1,2), %zmm2 {%k1}
+# CHECK-NEXT: 5 21 4.00 * vpgatherqq (%rax,%zmm1,2), %zmm2 {%k1}
+# CHECK-NEXT: 5 21 4.00 * vpgatherqd (%rax,%zmm1,2), %ymm2 {%k1}
# CHECK-NEXT: 2 10 1.00 vpmulld %zmm16, %zmm17, %zmm19
# CHECK-NEXT: 3 17 1.00 * vpmulld (%rax), %zmm17, %zmm19
# CHECK-NEXT: 3 17 1.00 * vpmulld (%rax){1to16}, %zmm17, %zmm19
@@ -1001,7 +1019,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - 612.00 188.00 26.00 153.00 153.00 - 330.00 - -
+# CHECK-NEXT: - 612.00 200.67 30.67 193.00 193.00 - 334.67 2.00 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -1077,6 +1095,10 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: - 10.00 2.00 - - - - 1.00 - - vdivps %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - 10.00 2.00 - 0.50 0.50 - 1.00 - - vdivps (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - 10.00 2.00 - 0.50 0.50 - 1.00 - - vdivps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: - - 1.58 0.58 4.00 4.00 - 0.58 0.25 - vgatherdpd (%rax,%ymm1,2), %zmm2 {%k1}
+# CHECK-NEXT: - - 1.58 0.58 8.00 8.00 - 0.58 0.25 - vgatherdps (%rax,%zmm1,2), %zmm2 {%k1}
+# CHECK-NEXT: - - 1.58 0.58 4.00 4.00 - 0.58 0.25 - vgatherqpd (%rax,%zmm1,2), %zmm2 {%k1}
+# CHECK-NEXT: - - 1.58 0.58 4.00 4.00 - 0.58 0.25 - vgatherqps (%rax,%zmm1,2), %ymm2 {%k1}
# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmaxpd %zmm16, %zmm17, %zmm19
# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vmaxpd (%rax), %zmm17, %zmm19
# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vmaxpd (%rax){1to8}, %zmm17, %zmm19
@@ -1167,6 +1189,10 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpaddq %zmm16, %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpaddq (%rax), %zmm17, %zmm19 {%k1} {z}
# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpaddq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT: - - 1.58 0.58 4.00 4.00 - 0.58 0.25 - vpgatherdq (%rax,%ymm1,2), %zmm2 {%k1}
+# CHECK-NEXT: - - 1.58 0.58 8.00 8.00 - 0.58 0.25 - vpgatherdd (%rax,%zmm1,2), %zmm2 {%k1}
+# CHECK-NEXT: - - 1.58 0.58 4.00 4.00 - 0.58 0.25 - vpgatherqq (%rax,%zmm1,2), %zmm2 {%k1}
+# CHECK-NEXT: - - 1.58 0.58 4.00 4.00 - 0.58 0.25 - vpgatherqd (%rax,%zmm1,2), %ymm2 {%k1}
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vpmulld %zmm16, %zmm17, %zmm19
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - vpmulld (%rax), %zmm17, %zmm19
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - vpmulld (%rax){1to16}, %zmm17, %zmm19
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s
index e96d501c243d..4b8db4f0f4d4 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s
@@ -121,6 +121,16 @@ vdivps %ymm16, %ymm17, %ymm19 {z}{k1}
vdivps (%rax), %ymm17, %ymm19 {z}{k1}
vdivps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
+vgatherdpd (%rax,%xmm1,2), %ymm2 {k1}
+vgatherdps (%rax,%ymm1,2), %ymm2 {k1}
+vgatherqpd (%rax,%ymm1,2), %ymm2 {k1}
+vgatherqps (%rax,%ymm1,2), %xmm2 {k1}
+
+vgatherdpd (%rax,%xmm1,2), %xmm2 {k1}
+vgatherdps (%rax,%xmm1,2), %xmm2 {k1}
+vgatherqpd (%rax,%xmm1,2), %xmm2 {k1}
+vgatherqps (%rax,%xmm1,2), %xmm2 {k1}
+
vmaxpd %xmm16, %xmm17, %xmm19
vmaxpd (%rax), %xmm17, %xmm19
vmaxpd (%rax){1to2}, %xmm17, %xmm19
@@ -421,6 +431,16 @@ vpermq %ymm16, %ymm17, %ymm19 {z}{k1}
vpermq (%rax), %ymm17, %ymm19 {z}{k1}
vpermq (%rax){1to4}, %ymm17, %ymm19 {z}{k1}
+vpgatherdq (%rax,%xmm1,2), %ymm2 {k1}
+vpgatherdd (%rax,%ymm1,2), %ymm2 {k1}
+vpgatherqq (%rax,%ymm1,2), %ymm2 {k1}
+vpgatherqd (%rax,%ymm1,2), %xmm2 {k1}
+
+vpgatherdq (%rax,%xmm1,2), %xmm2 {k1}
+vpgatherdd (%rax,%xmm1,2), %xmm2 {k1}
+vpgatherqq (%rax,%xmm1,2), %xmm2 {k1}
+vpgatherqd (%rax,%xmm1,2), %xmm2 {k1}
+
vpmulld %xmm16, %xmm17, %xmm19
vpmulld (%rax), %xmm17, %xmm19
vpmulld (%rax){1to4}, %xmm17, %xmm19
@@ -858,6 +878,14 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK-NEXT: 1 11 5.00 vdivps %ymm16, %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: 2 18 5.00 * vdivps (%rax), %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: 2 18 5.00 * vdivps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: 5 19 2.00 * vgatherdpd (%rax,%xmm1,2), %ymm2 {%k1}
+# CHECK-NEXT: 5 21 4.00 * vgatherdps (%rax,%ymm1,2), %ymm2 {%k1}
+# CHECK-NEXT: 5 19 2.00 * vgatherqpd (%rax,%ymm1,2), %ymm2 {%k1}
+# CHECK-NEXT: 5 19 2.00 * vgatherqps (%rax,%ymm1,2), %xmm2 {%k1}
+# CHECK-NEXT: 5 17 1.00 * vgatherdpd (%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT: 5 19 2.00 * vgatherdps (%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT: 5 17 1.00 * vgatherqpd (%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT: 5 17 1.00 * vgatherqps (%rax,%xmm1,2), %xmm2 {%k1}
# CHECK-NEXT: 1 4 0.50 vmaxpd %xmm16, %xmm17, %xmm19
# CHECK-NEXT: 2 10 0.50 * vmaxpd (%rax), %xmm17, %xmm19
# CHECK-NEXT: 2 10 0.50 * vmaxpd (%rax){1to2}, %xmm17, %xmm19
@@ -1128,6 +1156,14 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK-NEXT: 1 3 1.00 vpermq %ymm16, %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: 2 10 1.00 * vpermq (%rax), %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: 2 10 1.00 * vpermq (%rax){1to4}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: 5 19 2.00 * vpgatherdq (%rax,%xmm1,2), %ymm2 {%k1}
+# CHECK-NEXT: 5 21 4.00 * vpgatherdd (%rax,%ymm1,2), %ymm2 {%k1}
+# CHECK-NEXT: 5 19 2.00 * vpgatherqq (%rax,%ymm1,2), %ymm2 {%k1}
+# CHECK-NEXT: 5 19 2.00 * vpgatherqd (%rax,%ymm1,2), %xmm2 {%k1}
+# CHECK-NEXT: 5 17 1.00 * vpgatherdq (%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT: 5 19 2.00 * vpgatherdd (%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT: 5 17 1.00 * vpgatherqq (%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT: 5 17 1.00 * vpgatherqd (%rax,%xmm1,2), %xmm2 {%k1}
# CHECK-NEXT: 2 10 1.00 vpmulld %xmm16, %xmm17, %xmm19
# CHECK-NEXT: 3 16 1.00 * vpmulld (%rax), %xmm17, %xmm19
# CHECK-NEXT: 3 16 1.00 * vpmulld (%rax){1to4}, %xmm17, %xmm19
@@ -1431,7 +1467,7 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - 423.00 217.00 145.00 222.00 222.00 - 328.00 - -
+# CHECK-NEXT: - 423.00 242.33 154.33 252.00 252.00 - 337.33 4.00 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -1543,6 +1579,14 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK-NEXT: - 5.00 1.00 - - - - - - - vdivps %ymm16, %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: - 5.00 1.00 - 0.50 0.50 - - - - vdivps (%rax), %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: - 5.00 1.00 - 0.50 0.50 - - - - vdivps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: - - 1.58 0.58 2.00 2.00 - 0.58 0.25 - vgatherdpd (%rax,%xmm1,2), %ymm2 {%k1}
+# CHECK-NEXT: - - 1.58 0.58 4.00 4.00 - 0.58 0.25 - vgatherdps (%rax,%ymm1,2), %ymm2 {%k1}
+# CHECK-NEXT: - - 1.58 0.58 2.00 2.00 - 0.58 0.25 - vgatherqpd (%rax,%ymm1,2), %ymm2 {%k1}
+# CHECK-NEXT: - - 1.58 0.58 2.00 2.00 - 0.58 0.25 - vgatherqps (%rax,%ymm1,2), %xmm2 {%k1}
+# CHECK-NEXT: - - 1.58 0.58 1.00 1.00 - 0.58 0.25 - vgatherdpd (%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT: - - 1.58 0.58 2.00 2.00 - 0.58 0.25 - vgatherdps (%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT: - - 1.58 0.58 1.00 1.00 - 0.58 0.25 - vgatherqpd (%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT: - - 1.58 0.58 1.00 1.00 - 0.58 0.25 - vgatherqps (%rax,%xmm1,2), %xmm2 {%k1}
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmaxpd %xmm16, %xmm17, %xmm19
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmaxpd (%rax), %xmm17, %xmm19
# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmaxpd (%rax){1to2}, %xmm17, %xmm19
@@ -1813,6 +1857,14 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
# CHECK-NEXT: - - - - - - - 1.00 - - vpermq %ymm16, %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpermq (%rax), %ymm17, %ymm19 {%k1} {z}
# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpermq (%rax){1to4}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT: - - 1.58 0.58 2.00 2.00 - 0.58 0.25 - vpgatherdq (%rax,%xmm1,2), %ymm2 {%k1}
+# CHECK-NEXT: - - 1.58 0.58 4.00 4.00 - 0.58 0.25 - vpgatherdd (%rax,%ymm1,2), %ymm2 {%k1}
+# CHECK-NEXT: - - 1.58 0.58 2.00 2.00 - 0.58 0.25 - vpgatherqq (%rax,%ymm1,2), %ymm2 {%k1}
+# CHECK-NEXT: - - 1.58 0.58 2.00 2.00 - 0.58 0.25 - vpgatherqd (%rax,%ymm1,2), %xmm2 {%k1}
+# CHECK-NEXT: - - 1.58 0.58 1.00 1.00 - 0.58 0.25 - vpgatherdq (%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT: - - 1.58 0.58 2.00 2.00 - 0.58 0.25 - vpgatherdd (%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT: - - 1.58 0.58 1.00 1.00 - 0.58 0.25 - vpgatherqq (%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT: - - 1.58 0.58 1.00 1.00 - 0.58 0.25 - vpgatherqd (%rax,%xmm1,2), %xmm2 {%k1}
# CHECK-NEXT: - - 1.00 1.00 - - - - - - vpmulld %xmm16, %xmm17, %xmm19
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vpmulld (%rax), %xmm17, %xmm19
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vpmulld (%rax){1to4}, %xmm17, %xmm19
More information about the llvm-commits
mailing list