[llvm] c6bdd8e - [X86] Improve the gather scheduler models for SkylakeClient and SkylakeServer

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 5 13:27:06 PST 2020


Author: Craig Topper
Date: 2020-02-05T13:26:47-08:00
New Revision: c6bdd8e73110e14dc54833137cecef9c07d2dc24

URL: https://github.com/llvm/llvm-project/commit/c6bdd8e73110e14dc54833137cecef9c07d2dc24
DIFF: https://github.com/llvm/llvm-project/commit/c6bdd8e73110e14dc54833137cecef9c07d2dc24.diff

LOG: [X86] Improve the gather scheduler models for SkylakeClient and SkylakeServer

The load ports need a cycle for each potentially loaded element just like Haswell and Skylake. Unlike Haswell and Broadwell, the number of uops does not scale with the number of elements. Instead the load uops run for multiple cycles.

I've taken the latency number from the uops.info. The port binding for the non-load uops is taken from the original IACA data I have.

Differential Revision: https://reviews.llvm.org/D74000

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86SchedSkylakeClient.td
    llvm/lib/Target/X86/X86SchedSkylakeServer.td
    llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s
    llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s
    llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx2.s
    llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx2.s
    llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s
    llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
index 4dc3b199b601..0950203801af 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
@@ -1593,33 +1593,31 @@ def SKLWriteResGroup196 : SchedWriteRes<[SKLPort0,SKLPort23]> {
 }
 def: InstRW<[SKLWriteResGroup196], (instregex "DIV_F(32|64)m")>;
 
-def SKLWriteResGroup196_1 : SchedWriteRes<[SKLPort0, SKLPort23, SKLPort5, SKLPort015]> {
-  let Latency = 22;
-  let NumMicroOps = 5;
+def SKLWriteResGroupVEX2 : SchedWriteRes<[SKLPort0, SKLPort23, SKLPort5, SKLPort015]> {
+  let Latency = 18;
+  let NumMicroOps = 5; // 2 uops perform multiple loads
   let ResourceCycles = [1,2,1,1];
 }
-def: InstRW<[SKLWriteResGroup196_1], (instrs VGATHERDPSrm,
-                                             VGATHERDPDrm,
-                                             VGATHERQPDrm,
-                                             VGATHERQPSrm,
-                                             VPGATHERDDrm,
-                                             VPGATHERDQrm,
-                                             VPGATHERQDrm,
-                                             VPGATHERQQrm)>;
+def: InstRW<[SKLWriteResGroupVEX2], (instrs VGATHERDPDrm, VPGATHERDQrm,
+                                            VGATHERQPDrm, VPGATHERQQrm,
+                                            VGATHERQPSrm, VPGATHERQDrm)>;
 
-def SKLWriteResGroup196_2 : SchedWriteRes<[SKLPort0, SKLPort23, SKLPort5, SKLPort015]> {
-  let Latency = 25;
-  let NumMicroOps = 5;
-  let ResourceCycles = [1,2,1,1];
+def SKLWriteResGroupVEX4 : SchedWriteRes<[SKLPort0, SKLPort23, SKLPort5, SKLPort015]> {
+  let Latency = 20;
+  let NumMicroOps = 5; // 2 uops peform multiple loads
+  let ResourceCycles = [1,4,1,1];
+}
+def: InstRW<[SKLWriteResGroupVEX4], (instrs VGATHERDPDYrm, VPGATHERDQYrm,
+                                            VGATHERDPSrm,  VPGATHERDDrm,
+                                            VGATHERQPDYrm, VPGATHERQQYrm,
+                                            VGATHERQPSYrm,  VPGATHERQDYrm)>;
+
+def SKLWriteResGroupVEX8 : SchedWriteRes<[SKLPort0, SKLPort23, SKLPort5, SKLPort015]> {
+  let Latency = 22;
+  let NumMicroOps = 5; // 2 uops perform multiple loads
+  let ResourceCycles = [1,8,1,1];
 }
-def: InstRW<[SKLWriteResGroup196_2], (instrs VGATHERDPSYrm,
-                                             VGATHERQPDYrm,
-                                             VGATHERQPSYrm,
-                                             VPGATHERDDYrm,
-                                             VPGATHERDQYrm,
-                                             VPGATHERQDYrm,
-                                             VPGATHERQQYrm,
-                                             VGATHERDPDYrm)>;
+def: InstRW<[SKLWriteResGroupVEX8], (instrs VGATHERDPSYrm,  VPGATHERDDYrm)>;
 
 def SKLWriteResGroup198 : SchedWriteRes<[SKLPort0,SKLPort4,SKLPort5,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> {
   let Latency = 23;

diff  --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index 3fdc8dda1d06..76c8f0dbac3d 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -2145,14 +2145,6 @@ def SKXWriteResGroup211 : SchedWriteRes<[SKXPort23,SKXPort015]> {
 def: InstRW<[SKXWriteResGroup211], (instregex "VPMULLQZ256rm(b?)",
                                               "VPMULLQZrm(b?)")>;
 
-def SKXWriteResGroup214 : SchedWriteRes<[]> {
-  let Latency = 20;
-  let NumMicroOps = 0;
-}
-def: InstRW<[SKXWriteResGroup214], (instrs VGATHERDPSZ128rm,
-                                           VGATHERQPSZrm,
-                                           VPGATHERDDZ128rm)>;
-
 def SKXWriteResGroup215 : SchedWriteRes<[SKXPort0]> {
   let Latency = 20;
   let NumMicroOps = 1;
@@ -2167,15 +2159,41 @@ def SKXWriteResGroup216 : SchedWriteRes<[SKXPort0,SKXPort23,SKXFPDivider]> {
 }
 def : SchedAlias<WriteFDiv64XLd, SKXWriteResGroup216>; // TODO - convert to ZnWriteResFpuPair
 
-def SKXWriteResGroup218 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
-  let Latency = 20;
-  let NumMicroOps = 5;
+def SKXWriteGatherEVEX2 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
+  let Latency = 17;
+  let NumMicroOps = 5; // 2 uops perform multiple loads
   let ResourceCycles = [1,2,1,1];
 }
-def: InstRW<[SKXWriteResGroup218], (instrs VGATHERQPSZ128rm,
-                                           VGATHERQPSZ256rm,
-                                           VPGATHERQDZ128rm,
-                                           VPGATHERQDZ256rm)>;
+def: InstRW<[SKXWriteGatherEVEX2], (instrs VGATHERQPSZ128rm, VPGATHERQDZ128rm,
+                                           VGATHERDPDZ128rm, VPGATHERDQZ128rm,
+                                           VGATHERQPDZ128rm, VPGATHERQQZ128rm)>;
+
+def SKXWriteGatherEVEX4 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
+  let Latency = 19;
+  let NumMicroOps = 5; // 2 uops perform multiple loads
+  let ResourceCycles = [1,4,1,1];
+}
+def: InstRW<[SKXWriteGatherEVEX4], (instrs VGATHERQPSZ256rm, VPGATHERQDZ256rm,
+                                           VGATHERQPDZ256rm, VPGATHERQQZ256rm,
+                                           VGATHERDPSZ128rm, VPGATHERDDZ128rm,
+                                           VGATHERDPDZ256rm, VPGATHERDQZ256rm)>;
+
+def SKXWriteGatherEVEX8 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
+  let Latency = 21;
+  let NumMicroOps = 5; // 2 uops perform multiple loads
+  let ResourceCycles = [1,8,1,1];
+}
+def: InstRW<[SKXWriteGatherEVEX8], (instrs VGATHERDPSZ256rm, VPGATHERDDZ256rm,
+                                           VGATHERDPDZrm,    VPGATHERDQZrm,
+                                           VGATHERQPDZrm,    VPGATHERQQZrm,
+                                           VGATHERQPSZrm,    VPGATHERQDZrm)>;
+
+def SKXWriteGatherEVEX16 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
+  let Latency = 25;
+  let NumMicroOps = 5; // 2 uops perform multiple loads
+  let ResourceCycles = [1,16,1,1];
+}
+def: InstRW<[SKXWriteGatherEVEX16], (instrs VGATHERDPSZrm, VPGATHERDDZrm)>;
 
 def SKXWriteResGroup219 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort6,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> {
   let Latency = 20;
@@ -2205,57 +2223,31 @@ def SKXWriteResGroup223 : SchedWriteRes<[SKXPort0,SKXPort23]> {
 }
 def: InstRW<[SKXWriteResGroup223], (instregex "DIV_F(32|64)m")>;
 
-def SKXWriteResGroup224 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
-  let Latency = 22;
-  let NumMicroOps = 5;
+def SKXWriteResGroupVEX2 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> {
+  let Latency = 18;
+  let NumMicroOps = 5; // 2 uops perform multiple loads
   let ResourceCycles = [1,2,1,1];
 }
-def: InstRW<[SKXWriteResGroup224], (instrs VGATHERDPDZ128rm,
-                                           VGATHERQPDZ128rm,
-                                           VPGATHERDQZ128rm,
-                                           VPGATHERQQZ128rm)>;
+def: InstRW<[SKXWriteResGroupVEX2], (instrs VGATHERDPDrm, VPGATHERDQrm,
+                                            VGATHERQPDrm, VPGATHERQQrm,
+                                            VGATHERQPSrm, VPGATHERQDrm)>;
 
-def SKXWriteResGroup224_2 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> {
-  let Latency = 22;
-  let NumMicroOps = 5;
-  let ResourceCycles = [1,2,1,1];
+def SKXWriteResGroupVEX4 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> {
+  let Latency = 20;
+  let NumMicroOps = 5; // 2 uops peform multiple loads
+  let ResourceCycles = [1,4,1,1];
 }
-def: InstRW<[SKXWriteResGroup224_2], (instrs VGATHERDPSrm,
-                                             VGATHERDPDrm,
-                                             VGATHERQPDrm,
-                                             VGATHERQPSrm,
-                                             VPGATHERDDrm,
-                                             VPGATHERDQrm,
-                                             VPGATHERQDrm,
-                                             VPGATHERQQrm,
-                                             VPGATHERDDrm,
-                                             VPGATHERQDrm,
-                                             VPGATHERDQrm,
-                                             VPGATHERQQrm,
-                                             VGATHERDPSrm,
-                                             VGATHERQPSrm,
-                                             VGATHERDPDrm,
-                                             VGATHERQPDrm)>;
-
-def SKXWriteResGroup224_3 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> {
-  let Latency = 25;
-  let NumMicroOps = 5;
-  let ResourceCycles = [1,2,1,1];
+def: InstRW<[SKXWriteResGroupVEX4], (instrs VGATHERDPDYrm, VPGATHERDQYrm,
+                                            VGATHERDPSrm,  VPGATHERDDrm,
+                                            VGATHERQPDYrm, VPGATHERQQYrm,
+                                            VGATHERQPSYrm,  VPGATHERQDYrm)>;
+
+def SKXWriteResGroupVEX8 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> {
+  let Latency = 22;
+  let NumMicroOps = 5; // 2 uops perform multiple loads
+  let ResourceCycles = [1,8,1,1];
 }
-def: InstRW<[SKXWriteResGroup224_3], (instrs VGATHERDPSYrm,
-                                             VGATHERQPDYrm,
-                                             VGATHERQPSYrm,
-                                             VPGATHERDDYrm,
-                                             VPGATHERDQYrm,
-                                             VPGATHERQDYrm,
-                                             VPGATHERQQYrm,
-                                             VPGATHERDDYrm,
-                                             VPGATHERQDYrm,
-                                             VPGATHERDQYrm,
-                                             VPGATHERQQYrm,
-                                             VGATHERDPSYrm,
-                                             VGATHERQPSYrm,
-                                             VGATHERDPDYrm)>;
+def: InstRW<[SKXWriteResGroupVEX8], (instrs VGATHERDPSYrm, VPGATHERDDYrm)>;
 
 def SKXWriteResGroup225 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort015]> {
   let Latency = 22;
@@ -2279,27 +2271,6 @@ def SKXWriteResGroup233 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
 }
 def: InstRW<[SKXWriteResGroup233], (instregex "DIV_FI(16|32)m")>;
 
-def SKXWriteResGroup234 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
-  let Latency = 25;
-  let NumMicroOps = 5;
-  let ResourceCycles = [1,2,1,1];
-}
-def: InstRW<[SKXWriteResGroup234], (instrs VGATHERDPDZ256rm,
-                                           VGATHERQPDZ256rm,
-                                           VPGATHERDQZ256rm,
-                                           VPGATHERQDZrm,
-                                           VPGATHERQQZ256rm)>;
-
-def SKXWriteResGroup238 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
-  let Latency = 26;
-  let NumMicroOps = 5;
-  let ResourceCycles = [1,2,1,1];
-}
-def: InstRW<[SKXWriteResGroup238], (instrs VGATHERDPDZrm,
-                                           VGATHERQPDZrm,
-                                           VPGATHERDQZrm,
-                                           VPGATHERQQZrm)>;
-
 def SKXWriteResGroup239 : SchedWriteRes<[SKXPort0,SKXPort23]> {
   let Latency = 27;
   let NumMicroOps = 2;
@@ -2307,14 +2278,6 @@ def SKXWriteResGroup239 : SchedWriteRes<[SKXPort0,SKXPort23]> {
 }
 def: InstRW<[SKXWriteResGroup239], (instregex "DIVR_F(32|64)m")>;
 
-def SKXWriteResGroup240 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
-  let Latency = 27;
-  let NumMicroOps = 5;
-  let ResourceCycles = [1,2,1,1];
-}
-def: InstRW<[SKXWriteResGroup240], (instrs VGATHERDPSZ256rm,
-                                           VPGATHERDDZ256rm)>;
-
 def SKXWriteResGroup242 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23,SKXPort015]> {
   let Latency = 29;
   let NumMicroOps = 15;
@@ -2329,14 +2292,6 @@ def SKXWriteResGroup243 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> {
 }
 def: InstRW<[SKXWriteResGroup243], (instregex "DIVR_FI(16|32)m")>;
 
-def SKXWriteResGroup245 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> {
-  let Latency = 30;
-  let NumMicroOps = 5;
-  let ResourceCycles = [1,2,1,1];
-}
-def: InstRW<[SKXWriteResGroup245], (instrs VGATHERDPSZrm,
-                                           VPGATHERDDZrm)>;
-
 def SKXWriteResGroup247 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort23,SKXPort06,SKXPort0156]> {
   let Latency = 35;
   let NumMicroOps = 23;

diff  --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s
index 1bbd0f5ed704..d232058859c5 100644
--- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s
+++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s
@@ -181,6 +181,11 @@ vpaddq            %zmm16, %zmm17, %zmm19 {z}{k1}
 vpaddq            (%rax), %zmm17, %zmm19 {z}{k1}
 vpaddq            (%rax){1to8}, %zmm17, %zmm19 {z}{k1}
 
+vpgatherdq        (%rax,%ymm1,2), %zmm2 {k1}
+vpgatherdd        (%rax,%zmm1,2), %zmm2 {k1}
+vpgatherqq        (%rax,%zmm1,2), %zmm2 {k1}
+vpgatherqd        (%rax,%zmm1,2), %ymm2 {k1}
+
 vpmulld           %zmm16, %zmm17, %zmm19
 vpmulld           (%rax), %zmm17, %zmm19
 vpmulld           (%rax){1to16}, %zmm17, %zmm19
@@ -686,6 +691,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  1      1     0.50                        vpaddq	%zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      8     0.50    *                   vpaddq	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      8     0.50    *                   vpaddq	(%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  1      5     0.50    *                   vpgatherdq	(%rax,%ymm1,2), %zmm2 {%k1}
+# CHECK-NEXT:  1      5     0.50    *                   vpgatherdd	(%rax,%zmm1,2), %zmm2 {%k1}
+# CHECK-NEXT:  1      5     0.50    *                   vpgatherqq	(%rax,%zmm1,2), %zmm2 {%k1}
+# CHECK-NEXT:  1      5     0.50    *                   vpgatherqd	(%rax,%zmm1,2), %ymm2 {%k1}
 # CHECK-NEXT:  1      5     1.00                        vpmulld	%zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  2      12    1.00    *                   vpmulld	(%rax), %zmm17, %zmm19
 # CHECK-NEXT:  2      12    1.00    *                   vpmulld	(%rax){1to16}, %zmm17, %zmm19
@@ -999,7 +1008,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]
-# CHECK-NEXT:  -     1506.00 129.00 144.00  -    270.00 153.00 153.00
+# CHECK-NEXT:  -     1506.00 129.00 144.00  -    270.00 155.00 155.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]  Instructions:
@@ -1165,6 +1174,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -      -      -     0.50    -     0.50    -      -     vpaddq	%zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -     0.50    -     0.50   0.50   0.50   vpaddq	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -     0.50    -     0.50   0.50   0.50   vpaddq	(%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -      -     0.50   0.50   vpgatherdq	(%rax,%ymm1,2), %zmm2 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -     0.50   0.50   vpgatherdd	(%rax,%zmm1,2), %zmm2 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -     0.50   0.50   vpgatherqq	(%rax,%zmm1,2), %zmm2 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -     0.50   0.50   vpgatherqd	(%rax,%zmm1,2), %ymm2 {%k1}
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vpmulld	%zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50   vpmulld	(%rax), %zmm17, %zmm19
 # CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50   vpmulld	(%rax){1to16}, %zmm17, %zmm19

diff  --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s
index 6ac3448c0195..6d43b8c54b40 100644
--- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s
+++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s
@@ -121,6 +121,16 @@ vdivps            %ymm16, %ymm17, %ymm19 {z}{k1}
 vdivps            (%rax), %ymm17, %ymm19 {z}{k1}
 vdivps            (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
+vgatherdpd        (%rax,%xmm1,2), %ymm2 {k1}
+vgatherdps        (%rax,%ymm1,2), %ymm2 {k1}
+vgatherqpd        (%rax,%ymm1,2), %ymm2 {k1}
+vgatherqps        (%rax,%ymm1,2), %xmm2 {k1}
+
+vgatherdpd        (%rax,%xmm1,2), %xmm2 {k1}
+vgatherdps        (%rax,%xmm1,2), %xmm2 {k1}
+vgatherqpd        (%rax,%xmm1,2), %xmm2 {k1}
+vgatherqps        (%rax,%xmm1,2), %xmm2 {k1}
+
 vmaxpd            %xmm16, %xmm17, %xmm19
 vmaxpd            (%rax), %xmm17, %xmm19
 vmaxpd            (%rax){1to2}, %xmm17, %xmm19
@@ -421,6 +431,16 @@ vpermq            %ymm16, %ymm17, %ymm19 {z}{k1}
 vpermq            (%rax), %ymm17, %ymm19 {z}{k1}
 vpermq            (%rax){1to4}, %ymm17, %ymm19 {z}{k1}
 
+vpgatherdq        (%rax,%xmm1,2), %ymm2 {k1}
+vpgatherdd        (%rax,%ymm1,2), %ymm2 {k1}
+vpgatherqq        (%rax,%ymm1,2), %ymm2 {k1}
+vpgatherqd        (%rax,%ymm1,2), %xmm2 {k1}
+
+vpgatherdq        (%rax,%xmm1,2), %xmm2 {k1}
+vpgatherdd        (%rax,%xmm1,2), %xmm2 {k1}
+vpgatherqq        (%rax,%xmm1,2), %xmm2 {k1}
+vpgatherqd        (%rax,%xmm1,2), %xmm2 {k1}
+
 vpmulld           %xmm16, %xmm17, %xmm19
 vpmulld           (%rax), %xmm17, %xmm19
 vpmulld           (%rax){1to4}, %xmm17, %xmm19
@@ -858,6 +878,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  3      29    28.00                       vdivps	%ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  4      36    28.00   *                   vdivps	(%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  4      36    28.00   *                   vdivps	(%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  1      5     0.50    *                   vgatherdpd	(%rax,%xmm1,2), %ymm2 {%k1}
+# CHECK-NEXT:  1      5     0.50    *                   vgatherdps	(%rax,%ymm1,2), %ymm2 {%k1}
+# CHECK-NEXT:  1      5     0.50    *                   vgatherqpd	(%rax,%ymm1,2), %ymm2 {%k1}
+# CHECK-NEXT:  1      5     0.50    *                   vgatherqps	(%rax,%ymm1,2), %xmm2 {%k1}
+# CHECK-NEXT:  1      5     0.50    *                   vgatherdpd	(%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT:  1      5     0.50    *                   vgatherdps	(%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT:  1      5     0.50    *                   vgatherqpd	(%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT:  1      5     0.50    *                   vgatherqps	(%rax,%xmm1,2), %xmm2 {%k1}
 # CHECK-NEXT:  1      3     1.00                        vmaxpd	%xmm16, %xmm17, %xmm19
 # CHECK-NEXT:  2      9     1.00    *                   vmaxpd	(%rax), %xmm17, %xmm19
 # CHECK-NEXT:  2      9     1.00    *                   vmaxpd	(%rax){1to2}, %xmm17, %xmm19
@@ -1128,6 +1156,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  1      1     1.00                        vpermq	%ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      8     1.00    *                   vpermq	(%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      8     1.00    *                   vpermq	(%rax){1to4}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  1      5     0.50    *                   vpgatherdq	(%rax,%xmm1,2), %ymm2 {%k1}
+# CHECK-NEXT:  1      5     0.50    *                   vpgatherdd	(%rax,%ymm1,2), %ymm2 {%k1}
+# CHECK-NEXT:  1      5     0.50    *                   vpgatherqq	(%rax,%ymm1,2), %ymm2 {%k1}
+# CHECK-NEXT:  1      5     0.50    *                   vpgatherqd	(%rax,%ymm1,2), %xmm2 {%k1}
+# CHECK-NEXT:  1      5     0.50    *                   vpgatherdq	(%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT:  1      5     0.50    *                   vpgatherdd	(%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT:  1      5     0.50    *                   vpgatherqq	(%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT:  1      5     0.50    *                   vpgatherqd	(%rax,%xmm1,2), %xmm2 {%k1}
 # CHECK-NEXT:  1      5     1.00                        vpmulld	%xmm16, %xmm17, %xmm19
 # CHECK-NEXT:  2      11    1.00    *                   vpmulld	(%rax), %xmm17, %xmm19
 # CHECK-NEXT:  2      11    1.00    *                   vpmulld	(%rax){1to4}, %xmm17, %xmm19
@@ -1429,7 +1465,7 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]
-# CHECK-NEXT:  -     1935.00 180.00 229.50  -    346.50 222.00 222.00
+# CHECK-NEXT:  -     1935.00 180.00 229.50  -    346.50 230.00 230.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]  Instructions:
@@ -1541,6 +1577,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  -     28.00  2.50    -      -     0.50    -      -     vdivps	%ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -     28.00  2.50    -      -     0.50   0.50   0.50   vdivps	(%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -     28.00  2.50    -      -     0.50   0.50   0.50   vdivps	(%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -      -     0.50   0.50   vgatherdpd	(%rax,%xmm1,2), %ymm2 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -     0.50   0.50   vgatherdps	(%rax,%ymm1,2), %ymm2 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -     0.50   0.50   vgatherqpd	(%rax,%ymm1,2), %ymm2 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -     0.50   0.50   vgatherqps	(%rax,%ymm1,2), %xmm2 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -     0.50   0.50   vgatherdpd	(%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -     0.50   0.50   vgatherdps	(%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -     0.50   0.50   vgatherqpd	(%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -     0.50   0.50   vgatherqps	(%rax,%xmm1,2), %xmm2 {%k1}
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     vmaxpd	%xmm16, %xmm17, %xmm19
 # CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   vmaxpd	(%rax), %xmm17, %xmm19
 # CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   vmaxpd	(%rax){1to2}, %xmm17, %xmm19
@@ -1811,6 +1855,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -     1.00    -      -     vpermq	%ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vpermq	(%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -      -     1.00   0.50   0.50   vpermq	(%rax){1to4}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -      -      -      -      -     0.50   0.50   vpgatherdq	(%rax,%xmm1,2), %ymm2 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -     0.50   0.50   vpgatherdd	(%rax,%ymm1,2), %ymm2 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -     0.50   0.50   vpgatherqq	(%rax,%ymm1,2), %ymm2 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -     0.50   0.50   vpgatherqd	(%rax,%ymm1,2), %xmm2 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -     0.50   0.50   vpgatherdq	(%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -     0.50   0.50   vpgatherdd	(%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -     0.50   0.50   vpgatherqq	(%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT:  -      -      -      -      -      -     0.50   0.50   vpgatherqd	(%rax,%xmm1,2), %xmm2 {%k1}
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vpmulld	%xmm16, %xmm17, %xmm19
 # CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50   vpmulld	(%rax), %xmm17, %xmm19
 # CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50   vpmulld	(%rax){1to4}, %xmm17, %xmm19

diff  --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx2.s
index 78e7ac0d8769..9f6b518da9d8 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx2.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx2.s
@@ -465,14 +465,14 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      3     1.00                        vbroadcastss	%xmm0, %ymm0
 # CHECK-NEXT:  1      3     1.00                        vextracti128	$1, %ymm0, %xmm2
 # CHECK-NEXT:  2      1     1.00           *            vextracti128	$1, %ymm0, (%rax)
-# CHECK-NEXT:  5      22    1.00    *                   vgatherdpd	%xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT:  5      25    1.00    *                   vgatherdpd	%ymm0, (%rax,%xmm1,2), %ymm2
-# CHECK-NEXT:  5      22    1.00    *                   vgatherdps	%xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT:  5      25    1.00    *                   vgatherdps	%ymm0, (%rax,%ymm1,2), %ymm2
-# CHECK-NEXT:  5      22    1.00    *                   vgatherqpd	%xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT:  5      25    1.00    *                   vgatherqpd	%ymm0, (%rax,%ymm1,2), %ymm2
-# CHECK-NEXT:  5      22    1.00    *                   vgatherqps	%xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT:  5      25    1.00    *                   vgatherqps	%xmm0, (%rax,%ymm1,2), %xmm2
+# CHECK-NEXT:  5      18    1.00    *                   vgatherdpd	%xmm0, (%rax,%xmm1,2), %xmm2
+# CHECK-NEXT:  5      20    2.00    *                   vgatherdpd	%ymm0, (%rax,%xmm1,2), %ymm2
+# CHECK-NEXT:  5      20    2.00    *                   vgatherdps	%xmm0, (%rax,%xmm1,2), %xmm2
+# CHECK-NEXT:  5      22    4.00    *                   vgatherdps	%ymm0, (%rax,%ymm1,2), %ymm2
+# CHECK-NEXT:  5      18    1.00    *                   vgatherqpd	%xmm0, (%rax,%xmm1,2), %xmm2
+# CHECK-NEXT:  5      20    2.00    *                   vgatherqpd	%ymm0, (%rax,%ymm1,2), %ymm2
+# CHECK-NEXT:  5      18    1.00    *                   vgatherqps	%xmm0, (%rax,%xmm1,2), %xmm2
+# CHECK-NEXT:  5      20    2.00    *                   vgatherqps	%xmm0, (%rax,%ymm1,2), %xmm2
 # CHECK-NEXT:  1      3     1.00                        vinserti128	$1, %xmm0, %ymm1, %ymm2
 # CHECK-NEXT:  2      7     0.50    *                   vinserti128	$1, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      7     0.50    *                   vmovntdqa	(%rax), %ymm0
@@ -568,14 +568,14 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  2      10    1.00    *                   vpermps	(%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      3     1.00                        vpermq	$1, %ymm0, %ymm2
 # CHECK-NEXT:  2      10    1.00    *                   vpermq	$1, (%rax), %ymm2
-# CHECK-NEXT:  5      22    1.00    *                   vpgatherdd	%xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT:  5      25    1.00    *                   vpgatherdd	%ymm0, (%rax,%ymm1,2), %ymm2
-# CHECK-NEXT:  5      22    1.00    *                   vpgatherdq	%xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT:  5      25    1.00    *                   vpgatherdq	%ymm0, (%rax,%xmm1,2), %ymm2
-# CHECK-NEXT:  5      22    1.00    *                   vpgatherqd	%xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT:  5      25    1.00    *                   vpgatherqd	%xmm0, (%rax,%ymm1,2), %xmm2
-# CHECK-NEXT:  5      22    1.00    *                   vpgatherqq	%xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT:  5      25    1.00    *                   vpgatherqq	%ymm0, (%rax,%ymm1,2), %ymm2
+# CHECK-NEXT:  5      20    2.00    *                   vpgatherdd	%xmm0, (%rax,%xmm1,2), %xmm2
+# CHECK-NEXT:  5      22    4.00    *                   vpgatherdd	%ymm0, (%rax,%ymm1,2), %ymm2
+# CHECK-NEXT:  5      18    1.00    *                   vpgatherdq	%xmm0, (%rax,%xmm1,2), %xmm2
+# CHECK-NEXT:  5      20    2.00    *                   vpgatherdq	%ymm0, (%rax,%xmm1,2), %ymm2
+# CHECK-NEXT:  5      18    1.00    *                   vpgatherqd	%xmm0, (%rax,%xmm1,2), %xmm2
+# CHECK-NEXT:  5      20    2.00    *                   vpgatherqd	%xmm0, (%rax,%ymm1,2), %xmm2
+# CHECK-NEXT:  5      18    1.00    *                   vpgatherqq	%xmm0, (%rax,%xmm1,2), %xmm2
+# CHECK-NEXT:  5      20    2.00    *                   vpgatherqq	%ymm0, (%rax,%ymm1,2), %ymm2
 # CHECK-NEXT:  3      3     2.00                        vphaddd	%ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  4      10    2.00    *                   vphaddd	(%rax), %ymm1, %ymm2
 # CHECK-NEXT:  3      3     2.00                        vphaddsw	%ymm0, %ymm1, %ymm2
@@ -776,7 +776,7 @@ vpxor           (%rax), %ymm1, %ymm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -      -     110.33 89.33  85.67  85.67  1.00   164.33  -     1.67
+# CHECK-NEXT:  -      -     110.33 89.33  99.67  99.67  1.00   164.33  -     1.67
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -786,13 +786,13 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vextracti128	$1, %ymm0, %xmm2
 # CHECK-NEXT:  -      -      -      -     0.33   0.33   1.00    -      -     0.33   vextracti128	$1, %ymm0, (%rax)
 # CHECK-NEXT:  -      -     1.33   0.33   1.00   1.00    -     1.33    -      -     vgatherdpd	%xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT:  -      -     1.33   0.33   1.00   1.00    -     1.33    -      -     vgatherdpd	%ymm0, (%rax,%xmm1,2), %ymm2
-# CHECK-NEXT:  -      -     1.33   0.33   1.00   1.00    -     1.33    -      -     vgatherdps	%xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT:  -      -     1.33   0.33   1.00   1.00    -     1.33    -      -     vgatherdps	%ymm0, (%rax,%ymm1,2), %ymm2
+# CHECK-NEXT:  -      -     1.33   0.33   2.00   2.00    -     1.33    -      -     vgatherdpd	%ymm0, (%rax,%xmm1,2), %ymm2
+# CHECK-NEXT:  -      -     1.33   0.33   2.00   2.00    -     1.33    -      -     vgatherdps	%xmm0, (%rax,%xmm1,2), %xmm2
+# CHECK-NEXT:  -      -     1.33   0.33   4.00   4.00    -     1.33    -      -     vgatherdps	%ymm0, (%rax,%ymm1,2), %ymm2
 # CHECK-NEXT:  -      -     1.33   0.33   1.00   1.00    -     1.33    -      -     vgatherqpd	%xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT:  -      -     1.33   0.33   1.00   1.00    -     1.33    -      -     vgatherqpd	%ymm0, (%rax,%ymm1,2), %ymm2
+# CHECK-NEXT:  -      -     1.33   0.33   2.00   2.00    -     1.33    -      -     vgatherqpd	%ymm0, (%rax,%ymm1,2), %ymm2
 # CHECK-NEXT:  -      -     1.33   0.33   1.00   1.00    -     1.33    -      -     vgatherqps	%xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT:  -      -     1.33   0.33   1.00   1.00    -     1.33    -      -     vgatherqps	%xmm0, (%rax,%ymm1,2), %xmm2
+# CHECK-NEXT:  -      -     1.33   0.33   2.00   2.00    -     1.33    -      -     vgatherqps	%xmm0, (%rax,%ymm1,2), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vinserti128	$1, %xmm0, %ymm1, %ymm2
 # CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vinserti128	$1, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -      -      -      -     vmovntdqa	(%rax), %ymm0
@@ -888,14 +888,14 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vpermps	(%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpermq	$1, %ymm0, %ymm2
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vpermq	$1, (%rax), %ymm2
-# CHECK-NEXT:  -      -     1.33   0.33   1.00   1.00    -     1.33    -      -     vpgatherdd	%xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT:  -      -     1.33   0.33   1.00   1.00    -     1.33    -      -     vpgatherdd	%ymm0, (%rax,%ymm1,2), %ymm2
+# CHECK-NEXT:  -      -     1.33   0.33   2.00   2.00    -     1.33    -      -     vpgatherdd	%xmm0, (%rax,%xmm1,2), %xmm2
+# CHECK-NEXT:  -      -     1.33   0.33   4.00   4.00    -     1.33    -      -     vpgatherdd	%ymm0, (%rax,%ymm1,2), %ymm2
 # CHECK-NEXT:  -      -     1.33   0.33   1.00   1.00    -     1.33    -      -     vpgatherdq	%xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT:  -      -     1.33   0.33   1.00   1.00    -     1.33    -      -     vpgatherdq	%ymm0, (%rax,%xmm1,2), %ymm2
+# CHECK-NEXT:  -      -     1.33   0.33   2.00   2.00    -     1.33    -      -     vpgatherdq	%ymm0, (%rax,%xmm1,2), %ymm2
 # CHECK-NEXT:  -      -     1.33   0.33   1.00   1.00    -     1.33    -      -     vpgatherqd	%xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT:  -      -     1.33   0.33   1.00   1.00    -     1.33    -      -     vpgatherqd	%xmm0, (%rax,%ymm1,2), %xmm2
+# CHECK-NEXT:  -      -     1.33   0.33   2.00   2.00    -     1.33    -      -     vpgatherqd	%xmm0, (%rax,%ymm1,2), %xmm2
 # CHECK-NEXT:  -      -     1.33   0.33   1.00   1.00    -     1.33    -      -     vpgatherqq	%xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT:  -      -     1.33   0.33   1.00   1.00    -     1.33    -      -     vpgatherqq	%ymm0, (%rax,%ymm1,2), %ymm2
+# CHECK-NEXT:  -      -     1.33   0.33   2.00   2.00    -     1.33    -      -     vpgatherqq	%ymm0, (%rax,%ymm1,2), %ymm2
 # CHECK-NEXT:  -      -     0.33   0.33    -      -      -     2.33    -      -     vphaddd	%ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     2.33    -      -     vphaddd	(%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -     2.00    -      -     vphaddsw	%ymm0, %ymm1, %ymm2

diff  --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx2.s
index 7d5257c2699e..f9c51a18ff7d 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx2.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx2.s
@@ -465,14 +465,14 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      3     1.00                        vbroadcastss	%xmm0, %ymm0
 # CHECK-NEXT:  1      3     1.00                        vextracti128	$1, %ymm0, %xmm2
 # CHECK-NEXT:  2      1     1.00           *            vextracti128	$1, %ymm0, (%rax)
-# CHECK-NEXT:  5      22    1.00    *                   vgatherdpd	%xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT:  5      25    1.00    *                   vgatherdpd	%ymm0, (%rax,%xmm1,2), %ymm2
-# CHECK-NEXT:  5      22    1.00    *                   vgatherdps	%xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT:  5      25    1.00    *                   vgatherdps	%ymm0, (%rax,%ymm1,2), %ymm2
-# CHECK-NEXT:  5      22    1.00    *                   vgatherqpd	%xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT:  5      25    1.00    *                   vgatherqpd	%ymm0, (%rax,%ymm1,2), %ymm2
-# CHECK-NEXT:  5      22    1.00    *                   vgatherqps	%xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT:  5      25    1.00    *                   vgatherqps	%xmm0, (%rax,%ymm1,2), %xmm2
+# CHECK-NEXT:  5      18    1.00    *                   vgatherdpd	%xmm0, (%rax,%xmm1,2), %xmm2
+# CHECK-NEXT:  5      20    2.00    *                   vgatherdpd	%ymm0, (%rax,%xmm1,2), %ymm2
+# CHECK-NEXT:  5      20    2.00    *                   vgatherdps	%xmm0, (%rax,%xmm1,2), %xmm2
+# CHECK-NEXT:  5      22    4.00    *                   vgatherdps	%ymm0, (%rax,%ymm1,2), %ymm2
+# CHECK-NEXT:  5      18    1.00    *                   vgatherqpd	%xmm0, (%rax,%xmm1,2), %xmm2
+# CHECK-NEXT:  5      20    2.00    *                   vgatherqpd	%ymm0, (%rax,%ymm1,2), %ymm2
+# CHECK-NEXT:  5      18    1.00    *                   vgatherqps	%xmm0, (%rax,%xmm1,2), %xmm2
+# CHECK-NEXT:  5      20    2.00    *                   vgatherqps	%xmm0, (%rax,%ymm1,2), %xmm2
 # CHECK-NEXT:  1      3     1.00                        vinserti128	$1, %xmm0, %ymm1, %ymm2
 # CHECK-NEXT:  2      7     0.50    *                   vinserti128	$1, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      7     0.50    *                   vmovntdqa	(%rax), %ymm0
@@ -568,14 +568,14 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  2      10    1.00    *                   vpermps	(%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      3     1.00                        vpermq	$1, %ymm0, %ymm2
 # CHECK-NEXT:  2      10    1.00    *                   vpermq	$1, (%rax), %ymm2
-# CHECK-NEXT:  5      22    1.00    *                   vpgatherdd	%xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT:  5      25    1.00    *                   vpgatherdd	%ymm0, (%rax,%ymm1,2), %ymm2
-# CHECK-NEXT:  5      22    1.00    *                   vpgatherdq	%xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT:  5      25    1.00    *                   vpgatherdq	%ymm0, (%rax,%xmm1,2), %ymm2
-# CHECK-NEXT:  5      22    1.00    *                   vpgatherqd	%xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT:  5      25    1.00    *                   vpgatherqd	%xmm0, (%rax,%ymm1,2), %xmm2
-# CHECK-NEXT:  5      22    1.00    *                   vpgatherqq	%xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT:  5      25    1.00    *                   vpgatherqq	%ymm0, (%rax,%ymm1,2), %ymm2
+# CHECK-NEXT:  5      20    2.00    *                   vpgatherdd	%xmm0, (%rax,%xmm1,2), %xmm2
+# CHECK-NEXT:  5      22    4.00    *                   vpgatherdd	%ymm0, (%rax,%ymm1,2), %ymm2
+# CHECK-NEXT:  5      18    1.00    *                   vpgatherdq	%xmm0, (%rax,%xmm1,2), %xmm2
+# CHECK-NEXT:  5      20    2.00    *                   vpgatherdq	%ymm0, (%rax,%xmm1,2), %ymm2
+# CHECK-NEXT:  5      18    1.00    *                   vpgatherqd	%xmm0, (%rax,%xmm1,2), %xmm2
+# CHECK-NEXT:  5      20    2.00    *                   vpgatherqd	%xmm0, (%rax,%ymm1,2), %xmm2
+# CHECK-NEXT:  5      18    1.00    *                   vpgatherqq	%xmm0, (%rax,%xmm1,2), %xmm2
+# CHECK-NEXT:  5      20    2.00    *                   vpgatherqq	%ymm0, (%rax,%ymm1,2), %ymm2
 # CHECK-NEXT:  3      3     2.00                        vphaddd	%ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  4      10    2.00    *                   vphaddd	(%rax), %ymm1, %ymm2
 # CHECK-NEXT:  3      3     2.00                        vphaddsw	%ymm0, %ymm1, %ymm2
@@ -776,7 +776,7 @@ vpxor           (%rax), %ymm1, %ymm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -      -     110.33 89.33  85.67  85.67  1.00   164.33  -     1.67
+# CHECK-NEXT:  -      -     110.33 89.33  99.67  99.67  1.00   164.33  -     1.67
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -786,13 +786,13 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vextracti128	$1, %ymm0, %xmm2
 # CHECK-NEXT:  -      -      -      -     0.33   0.33   1.00    -      -     0.33   vextracti128	$1, %ymm0, (%rax)
 # CHECK-NEXT:  -      -     1.33   0.33   1.00   1.00    -     1.33    -      -     vgatherdpd	%xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT:  -      -     1.33   0.33   1.00   1.00    -     1.33    -      -     vgatherdpd	%ymm0, (%rax,%xmm1,2), %ymm2
-# CHECK-NEXT:  -      -     1.33   0.33   1.00   1.00    -     1.33    -      -     vgatherdps	%xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT:  -      -     1.33   0.33   1.00   1.00    -     1.33    -      -     vgatherdps	%ymm0, (%rax,%ymm1,2), %ymm2
+# CHECK-NEXT:  -      -     1.33   0.33   2.00   2.00    -     1.33    -      -     vgatherdpd	%ymm0, (%rax,%xmm1,2), %ymm2
+# CHECK-NEXT:  -      -     1.33   0.33   2.00   2.00    -     1.33    -      -     vgatherdps	%xmm0, (%rax,%xmm1,2), %xmm2
+# CHECK-NEXT:  -      -     1.33   0.33   4.00   4.00    -     1.33    -      -     vgatherdps	%ymm0, (%rax,%ymm1,2), %ymm2
 # CHECK-NEXT:  -      -     1.33   0.33   1.00   1.00    -     1.33    -      -     vgatherqpd	%xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT:  -      -     1.33   0.33   1.00   1.00    -     1.33    -      -     vgatherqpd	%ymm0, (%rax,%ymm1,2), %ymm2
+# CHECK-NEXT:  -      -     1.33   0.33   2.00   2.00    -     1.33    -      -     vgatherqpd	%ymm0, (%rax,%ymm1,2), %ymm2
 # CHECK-NEXT:  -      -     1.33   0.33   1.00   1.00    -     1.33    -      -     vgatherqps	%xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT:  -      -     1.33   0.33   1.00   1.00    -     1.33    -      -     vgatherqps	%xmm0, (%rax,%ymm1,2), %xmm2
+# CHECK-NEXT:  -      -     1.33   0.33   2.00   2.00    -     1.33    -      -     vgatherqps	%xmm0, (%rax,%ymm1,2), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vinserti128	$1, %xmm0, %ymm1, %ymm2
 # CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vinserti128	$1, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -      -      -      -     vmovntdqa	(%rax), %ymm0
@@ -888,14 +888,14 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vpermps	(%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpermq	$1, %ymm0, %ymm2
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vpermq	$1, (%rax), %ymm2
-# CHECK-NEXT:  -      -     1.33   0.33   1.00   1.00    -     1.33    -      -     vpgatherdd	%xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT:  -      -     1.33   0.33   1.00   1.00    -     1.33    -      -     vpgatherdd	%ymm0, (%rax,%ymm1,2), %ymm2
+# CHECK-NEXT:  -      -     1.33   0.33   2.00   2.00    -     1.33    -      -     vpgatherdd	%xmm0, (%rax,%xmm1,2), %xmm2
+# CHECK-NEXT:  -      -     1.33   0.33   4.00   4.00    -     1.33    -      -     vpgatherdd	%ymm0, (%rax,%ymm1,2), %ymm2
 # CHECK-NEXT:  -      -     1.33   0.33   1.00   1.00    -     1.33    -      -     vpgatherdq	%xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT:  -      -     1.33   0.33   1.00   1.00    -     1.33    -      -     vpgatherdq	%ymm0, (%rax,%xmm1,2), %ymm2
+# CHECK-NEXT:  -      -     1.33   0.33   2.00   2.00    -     1.33    -      -     vpgatherdq	%ymm0, (%rax,%xmm1,2), %ymm2
 # CHECK-NEXT:  -      -     1.33   0.33   1.00   1.00    -     1.33    -      -     vpgatherqd	%xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT:  -      -     1.33   0.33   1.00   1.00    -     1.33    -      -     vpgatherqd	%xmm0, (%rax,%ymm1,2), %xmm2
+# CHECK-NEXT:  -      -     1.33   0.33   2.00   2.00    -     1.33    -      -     vpgatherqd	%xmm0, (%rax,%ymm1,2), %xmm2
 # CHECK-NEXT:  -      -     1.33   0.33   1.00   1.00    -     1.33    -      -     vpgatherqq	%xmm0, (%rax,%xmm1,2), %xmm2
-# CHECK-NEXT:  -      -     1.33   0.33   1.00   1.00    -     1.33    -      -     vpgatherqq	%ymm0, (%rax,%ymm1,2), %ymm2
+# CHECK-NEXT:  -      -     1.33   0.33   2.00   2.00    -     1.33    -      -     vpgatherqq	%ymm0, (%rax,%ymm1,2), %ymm2
 # CHECK-NEXT:  -      -     0.33   0.33    -      -      -     2.33    -      -     vphaddd	%ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     2.33    -      -     vphaddd	(%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -     2.00    -      -     vphaddsw	%ymm0, %ymm1, %ymm2

diff  --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s
index 957fa6677c1a..e99030b7d439 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s
@@ -81,6 +81,11 @@ vdivps            %zmm16, %zmm17, %zmm19 {z}{k1}
 vdivps            (%rax), %zmm17, %zmm19 {z}{k1}
 vdivps            (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 
+vgatherdpd        (%rax,%ymm1,2), %zmm2 {k1}
+vgatherdps        (%rax,%zmm1,2), %zmm2 {k1}
+vgatherqpd        (%rax,%zmm1,2), %zmm2 {k1}
+vgatherqps        (%rax,%zmm1,2), %ymm2 {k1}
+
 vmaxpd            %zmm16, %zmm17, %zmm19
 vmaxpd            (%rax), %zmm17, %zmm19
 vmaxpd            (%rax){1to8}, %zmm17, %zmm19
@@ -181,6 +186,11 @@ vpaddq            %zmm16, %zmm17, %zmm19 {z}{k1}
 vpaddq            (%rax), %zmm17, %zmm19 {z}{k1}
 vpaddq            (%rax){1to8}, %zmm17, %zmm19 {z}{k1}
 
+vpgatherdq        (%rax,%ymm1,2), %zmm2 {k1}
+vpgatherdd        (%rax,%zmm1,2), %zmm2 {k1}
+vpgatherqq        (%rax,%zmm1,2), %zmm2 {k1}
+vpgatherqd        (%rax,%zmm1,2), %ymm2 {k1}
+
 vpmulld           %zmm16, %zmm17, %zmm19
 vpmulld           (%rax), %zmm17, %zmm19
 vpmulld           (%rax){1to16}, %zmm17, %zmm19
@@ -596,6 +606,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  3      18    10.00                       vdivps	%zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  4      25    10.00   *                   vdivps	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  4      25    10.00   *                   vdivps	(%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  5      21    4.00    *                   vgatherdpd	(%rax,%ymm1,2), %zmm2 {%k1}
+# CHECK-NEXT:  5      25    8.00    *                   vgatherdps	(%rax,%zmm1,2), %zmm2 {%k1}
+# CHECK-NEXT:  5      21    4.00    *                   vgatherqpd	(%rax,%zmm1,2), %zmm2 {%k1}
+# CHECK-NEXT:  5      21    4.00    *                   vgatherqps	(%rax,%zmm1,2), %ymm2 {%k1}
 # CHECK-NEXT:  1      4     0.50                        vmaxpd	%zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  2      11    0.50    *                   vmaxpd	(%rax), %zmm17, %zmm19
 # CHECK-NEXT:  2      11    0.50    *                   vmaxpd	(%rax){1to8}, %zmm17, %zmm19
@@ -686,6 +700,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  1      1     0.33                        vpaddq	%zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      8     0.50    *                   vpaddq	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      8     0.50    *                   vpaddq	(%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  5      21    4.00    *                   vpgatherdq	(%rax,%ymm1,2), %zmm2 {%k1}
+# CHECK-NEXT:  5      25    8.00    *                   vpgatherdd	(%rax,%zmm1,2), %zmm2 {%k1}
+# CHECK-NEXT:  5      21    4.00    *                   vpgatherqq	(%rax,%zmm1,2), %zmm2 {%k1}
+# CHECK-NEXT:  5      21    4.00    *                   vpgatherqd	(%rax,%zmm1,2), %ymm2 {%k1}
 # CHECK-NEXT:  2      10    1.00                        vpmulld	%zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  3      17    1.00    *                   vpmulld	(%rax), %zmm17, %zmm19
 # CHECK-NEXT:  3      17    1.00    *                   vpmulld	(%rax){1to16}, %zmm17, %zmm19
@@ -1001,7 +1019,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -     612.00 188.00 26.00  153.00 153.00  -     330.00  -      -
+# CHECK-NEXT:  -     612.00 200.67 30.67  193.00 193.00  -     334.67 2.00    -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -1077,6 +1095,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -     10.00  2.00    -      -      -      -     1.00    -      -     vdivps	%zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -     10.00  2.00    -     0.50   0.50    -     1.00    -      -     vdivps	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -     10.00  2.00    -     0.50   0.50    -     1.00    -      -     vdivps	(%rax){1to16}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     1.58   0.58   4.00   4.00    -     0.58   0.25    -     vgatherdpd	(%rax,%ymm1,2), %zmm2 {%k1}
+# CHECK-NEXT:  -      -     1.58   0.58   8.00   8.00    -     0.58   0.25    -     vgatherdps	(%rax,%zmm1,2), %zmm2 {%k1}
+# CHECK-NEXT:  -      -     1.58   0.58   4.00   4.00    -     0.58   0.25    -     vgatherqpd	(%rax,%zmm1,2), %zmm2 {%k1}
+# CHECK-NEXT:  -      -     1.58   0.58   4.00   4.00    -     0.58   0.25    -     vgatherqps	(%rax,%zmm1,2), %ymm2 {%k1}
 # CHECK-NEXT:  -      -     0.50    -      -      -      -     0.50    -      -     vmaxpd	%zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  -      -     0.50    -     0.50   0.50    -     0.50    -      -     vmaxpd	(%rax), %zmm17, %zmm19
 # CHECK-NEXT:  -      -     0.50    -     0.50   0.50    -     0.50    -      -     vmaxpd	(%rax){1to8}, %zmm17, %zmm19
@@ -1167,6 +1189,10 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -      -     0.33   0.33    -      -      -     0.33    -      -     vpaddq	%zmm16, %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vpaddq	(%rax), %zmm17, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vpaddq	(%rax){1to8}, %zmm17, %zmm19 {%k1} {z}
+# CHECK-NEXT:  -      -     1.58   0.58   4.00   4.00    -     0.58   0.25    -     vpgatherdq	(%rax,%ymm1,2), %zmm2 {%k1}
+# CHECK-NEXT:  -      -     1.58   0.58   8.00   8.00    -     0.58   0.25    -     vpgatherdd	(%rax,%zmm1,2), %zmm2 {%k1}
+# CHECK-NEXT:  -      -     1.58   0.58   4.00   4.00    -     0.58   0.25    -     vpgatherqq	(%rax,%zmm1,2), %zmm2 {%k1}
+# CHECK-NEXT:  -      -     1.58   0.58   4.00   4.00    -     0.58   0.25    -     vpgatherqd	(%rax,%zmm1,2), %ymm2 {%k1}
 # CHECK-NEXT:  -      -     1.00    -      -      -      -     1.00    -      -     vpmulld	%zmm16, %zmm17, %zmm19
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -     1.00    -      -     vpmulld	(%rax), %zmm17, %zmm19
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -     1.00    -      -     vpmulld	(%rax){1to16}, %zmm17, %zmm19

diff  --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s
index e96d501c243d..4b8db4f0f4d4 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s
@@ -121,6 +121,16 @@ vdivps            %ymm16, %ymm17, %ymm19 {z}{k1}
 vdivps            (%rax), %ymm17, %ymm19 {z}{k1}
 vdivps            (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
+vgatherdpd        (%rax,%xmm1,2), %ymm2 {k1}
+vgatherdps        (%rax,%ymm1,2), %ymm2 {k1}
+vgatherqpd        (%rax,%ymm1,2), %ymm2 {k1}
+vgatherqps        (%rax,%ymm1,2), %xmm2 {k1}
+
+vgatherdpd        (%rax,%xmm1,2), %xmm2 {k1}
+vgatherdps        (%rax,%xmm1,2), %xmm2 {k1}
+vgatherqpd        (%rax,%xmm1,2), %xmm2 {k1}
+vgatherqps        (%rax,%xmm1,2), %xmm2 {k1}
+
 vmaxpd            %xmm16, %xmm17, %xmm19
 vmaxpd            (%rax), %xmm17, %xmm19
 vmaxpd            (%rax){1to2}, %xmm17, %xmm19
@@ -421,6 +431,16 @@ vpermq            %ymm16, %ymm17, %ymm19 {z}{k1}
 vpermq            (%rax), %ymm17, %ymm19 {z}{k1}
 vpermq            (%rax){1to4}, %ymm17, %ymm19 {z}{k1}
 
+vpgatherdq        (%rax,%xmm1,2), %ymm2 {k1}
+vpgatherdd        (%rax,%ymm1,2), %ymm2 {k1}
+vpgatherqq        (%rax,%ymm1,2), %ymm2 {k1}
+vpgatherqd        (%rax,%ymm1,2), %xmm2 {k1}
+
+vpgatherdq        (%rax,%xmm1,2), %xmm2 {k1}
+vpgatherdd        (%rax,%xmm1,2), %xmm2 {k1}
+vpgatherqq        (%rax,%xmm1,2), %xmm2 {k1}
+vpgatherqd        (%rax,%xmm1,2), %xmm2 {k1}
+
 vpmulld           %xmm16, %xmm17, %xmm19
 vpmulld           (%rax), %xmm17, %xmm19
 vpmulld           (%rax){1to4}, %xmm17, %xmm19
@@ -858,6 +878,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  1      11    5.00                        vdivps	%ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      18    5.00    *                   vdivps	(%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      18    5.00    *                   vdivps	(%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  5      19    2.00    *                   vgatherdpd	(%rax,%xmm1,2), %ymm2 {%k1}
+# CHECK-NEXT:  5      21    4.00    *                   vgatherdps	(%rax,%ymm1,2), %ymm2 {%k1}
+# CHECK-NEXT:  5      19    2.00    *                   vgatherqpd	(%rax,%ymm1,2), %ymm2 {%k1}
+# CHECK-NEXT:  5      19    2.00    *                   vgatherqps	(%rax,%ymm1,2), %xmm2 {%k1}
+# CHECK-NEXT:  5      17    1.00    *                   vgatherdpd	(%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT:  5      19    2.00    *                   vgatherdps	(%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT:  5      17    1.00    *                   vgatherqpd	(%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT:  5      17    1.00    *                   vgatherqps	(%rax,%xmm1,2), %xmm2 {%k1}
 # CHECK-NEXT:  1      4     0.50                        vmaxpd	%xmm16, %xmm17, %xmm19
 # CHECK-NEXT:  2      10    0.50    *                   vmaxpd	(%rax), %xmm17, %xmm19
 # CHECK-NEXT:  2      10    0.50    *                   vmaxpd	(%rax){1to2}, %xmm17, %xmm19
@@ -1128,6 +1156,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  1      3     1.00                        vpermq	%ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      10    1.00    *                   vpermq	(%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  2      10    1.00    *                   vpermq	(%rax){1to4}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  5      19    2.00    *                   vpgatherdq	(%rax,%xmm1,2), %ymm2 {%k1}
+# CHECK-NEXT:  5      21    4.00    *                   vpgatherdd	(%rax,%ymm1,2), %ymm2 {%k1}
+# CHECK-NEXT:  5      19    2.00    *                   vpgatherqq	(%rax,%ymm1,2), %ymm2 {%k1}
+# CHECK-NEXT:  5      19    2.00    *                   vpgatherqd	(%rax,%ymm1,2), %xmm2 {%k1}
+# CHECK-NEXT:  5      17    1.00    *                   vpgatherdq	(%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT:  5      19    2.00    *                   vpgatherdd	(%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT:  5      17    1.00    *                   vpgatherqq	(%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT:  5      17    1.00    *                   vpgatherqd	(%rax,%xmm1,2), %xmm2 {%k1}
 # CHECK-NEXT:  2      10    1.00                        vpmulld	%xmm16, %xmm17, %xmm19
 # CHECK-NEXT:  3      16    1.00    *                   vpmulld	(%rax), %xmm17, %xmm19
 # CHECK-NEXT:  3      16    1.00    *                   vpmulld	(%rax){1to4}, %xmm17, %xmm19
@@ -1431,7 +1467,7 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -     423.00 217.00 145.00 222.00 222.00  -     328.00  -      -
+# CHECK-NEXT:  -     423.00 242.33 154.33 252.00 252.00  -     337.33 4.00    -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -1543,6 +1579,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  -     5.00   1.00    -      -      -      -      -      -      -     vdivps	%ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -     5.00   1.00    -     0.50   0.50    -      -      -      -     vdivps	(%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -     5.00   1.00    -     0.50   0.50    -      -      -      -     vdivps	(%rax){1to8}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -     1.58   0.58   2.00   2.00    -     0.58   0.25    -     vgatherdpd	(%rax,%xmm1,2), %ymm2 {%k1}
+# CHECK-NEXT:  -      -     1.58   0.58   4.00   4.00    -     0.58   0.25    -     vgatherdps	(%rax,%ymm1,2), %ymm2 {%k1}
+# CHECK-NEXT:  -      -     1.58   0.58   2.00   2.00    -     0.58   0.25    -     vgatherqpd	(%rax,%ymm1,2), %ymm2 {%k1}
+# CHECK-NEXT:  -      -     1.58   0.58   2.00   2.00    -     0.58   0.25    -     vgatherqps	(%rax,%ymm1,2), %xmm2 {%k1}
+# CHECK-NEXT:  -      -     1.58   0.58   1.00   1.00    -     0.58   0.25    -     vgatherdpd	(%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT:  -      -     1.58   0.58   2.00   2.00    -     0.58   0.25    -     vgatherdps	(%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT:  -      -     1.58   0.58   1.00   1.00    -     0.58   0.25    -     vgatherqpd	(%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT:  -      -     1.58   0.58   1.00   1.00    -     0.58   0.25    -     vgatherqps	(%rax,%xmm1,2), %xmm2 {%k1}
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -      -      -      -     vmaxpd	%xmm16, %xmm17, %xmm19
 # CHECK-NEXT:  -      -     0.50   0.50   0.50   0.50    -      -      -      -     vmaxpd	(%rax), %xmm17, %xmm19
 # CHECK-NEXT:  -      -     0.50   0.50   0.50   0.50    -      -      -      -     vmaxpd	(%rax){1to2}, %xmm17, %xmm19
@@ -1813,6 +1857,14 @@ vunpcklps         (%rax){1to8}, %ymm17, %ymm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpermq	%ymm16, %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vpermq	(%rax), %ymm17, %ymm19 {%k1} {z}
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     vpermq	(%rax){1to4}, %ymm17, %ymm19 {%k1} {z}
+# CHECK-NEXT:  -      -     1.58   0.58   2.00   2.00    -     0.58   0.25    -     vpgatherdq	(%rax,%xmm1,2), %ymm2 {%k1}
+# CHECK-NEXT:  -      -     1.58   0.58   4.00   4.00    -     0.58   0.25    -     vpgatherdd	(%rax,%ymm1,2), %ymm2 {%k1}
+# CHECK-NEXT:  -      -     1.58   0.58   2.00   2.00    -     0.58   0.25    -     vpgatherqq	(%rax,%ymm1,2), %ymm2 {%k1}
+# CHECK-NEXT:  -      -     1.58   0.58   2.00   2.00    -     0.58   0.25    -     vpgatherqd	(%rax,%ymm1,2), %xmm2 {%k1}
+# CHECK-NEXT:  -      -     1.58   0.58   1.00   1.00    -     0.58   0.25    -     vpgatherdq	(%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT:  -      -     1.58   0.58   2.00   2.00    -     0.58   0.25    -     vpgatherdd	(%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT:  -      -     1.58   0.58   1.00   1.00    -     0.58   0.25    -     vpgatherqq	(%rax,%xmm1,2), %xmm2 {%k1}
+# CHECK-NEXT:  -      -     1.58   0.58   1.00   1.00    -     0.58   0.25    -     vpgatherqd	(%rax,%xmm1,2), %xmm2 {%k1}
 # CHECK-NEXT:  -      -     1.00   1.00    -      -      -      -      -      -     vpmulld	%xmm16, %xmm17, %xmm19
 # CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -      -      -      -     vpmulld	(%rax), %xmm17, %xmm19
 # CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -      -      -      -     vpmulld	(%rax){1to4}, %xmm17, %xmm19


        


More information about the llvm-commits mailing list