[llvm] 746cf4f - [X86] Synchronise scheduler classes of VPERM2F128/VBROADCASTF128/VEXTRACTF128/VINSERTF128 with I128 equivalents
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 21 09:15:57 PST 2022
Author: Simon Pilgrim
Date: 2022-11-21T17:15:47Z
New Revision: 746cf4f13feaff97805e1daf8d600fa7287b3568
URL: https://github.com/llvm/llvm-project/commit/746cf4f13feaff97805e1daf8d600fa7287b3568
DIFF: https://github.com/llvm/llvm-project/commit/746cf4f13feaff97805e1daf8d600fa7287b3568.diff
LOG: [X86] Synchronise scheduler classes of VPERM2F128/VBROADCASTF128/VEXTRACTF128/VINSERTF128 with I128 equivalents
znver1/znver2 has barely any difference in behaviour between the AVX1/2 variants of these instructions - it looks like it was a copy+paste mistake to miss the AVX2 integer domain instructions in the overrides.
Having said that the override numbers don't appear to match the numbers in the AMD 17h SoGs very well - for instance vperm2f128/vperm2i128 might be microcoded from the AMD sense of >3 uops, but it doesn't have a 100cy latency..... These will need to be further addressed.
Added:
Modified:
llvm/lib/Target/X86/X86ScheduleZnver1.td
llvm/lib/Target/X86/X86ScheduleZnver2.td
llvm/test/tools/llvm-mca/X86/Znver1/resources-avx2.s
llvm/test/tools/llvm-mca/X86/Znver2/resources-avx2.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td
index 76e3ec4292663..acc39410742e6 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver1.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td
@@ -1055,16 +1055,19 @@ def : InstRW<[ZnWritePCMPGTQYm], (instrs VPCMPGTQYrm)>;
//=== Floating Point XMM and YMM Instructions ===//
//-- Move instructions --//
-// VPERM2F128.
-def : InstRW<[WriteMicrocoded], (instrs VPERM2F128rr)>;
-def : InstRW<[WriteMicrocoded], (instrs VPERM2F128rm)>;
+// VPERM2F128 / VPERM2I128.
+def : InstRW<[WriteMicrocoded], (instrs VPERM2F128rr,
+ VPERM2I128rr)>;
+def : InstRW<[WriteMicrocoded], (instrs VPERM2F128rm,
+ VPERM2I128rm)>;
def ZnWriteBROADCAST : SchedWriteRes<[ZnAGU, ZnFPU13]> {
let NumMicroOps = 2;
let Latency = 8;
}
-// VBROADCASTF128.
-def : InstRW<[ZnWriteBROADCAST], (instrs VBROADCASTF128)>;
+// VBROADCASTF128 / VBROADCASTI128.
+def : InstRW<[ZnWriteBROADCAST], (instrs VBROADCASTF128,
+ VBROADCASTI128)>;
// EXTRACTPS.
// r32,x,i.
@@ -1083,12 +1086,14 @@ def ZnWriteEXTRACTPSm : SchedWriteRes<[ZnAGU,ZnFPU12, ZnFPU2]> {
// m32,x,i.
def : InstRW<[ZnWriteEXTRACTPSm], (instregex "(V?)EXTRACTPSmr")>;
-// VEXTRACTF128.
+// VEXTRACTF128 / VEXTRACTI128.
// x,y,i.
-def : InstRW<[ZnWriteFPU013], (instrs VEXTRACTF128rr)>;
+def : InstRW<[ZnWriteFPU013], (instrs VEXTRACTF128rr,
+ VEXTRACTI128rr)>;
// m128,y,i.
-def : InstRW<[ZnWriteFPU013m], (instrs VEXTRACTF128mr)>;
+def : InstRW<[ZnWriteFPU013m], (instrs VEXTRACTF128mr,
+ VEXTRACTI128mr)>;
def ZnWriteVINSERT128r: SchedWriteRes<[ZnFPU013]> {
let Latency = 2;
@@ -1099,10 +1104,12 @@ def ZnWriteVINSERT128Ld: SchedWriteRes<[ZnAGU,ZnFPU013]> {
let NumMicroOps = 2;
let ResourceCycles = [1, 2];
}
-// VINSERTF128.
+// VINSERTF128 / VINSERTI128.
// y,y,x,i.
-def : InstRW<[ZnWriteVINSERT128r], (instrs VINSERTF128rr)>;
-def : InstRW<[ZnWriteVINSERT128Ld], (instrs VINSERTF128rm)>;
+def : InstRW<[ZnWriteVINSERT128r], (instrs VINSERTF128rr,
+ VINSERTI128rr)>;
+def : InstRW<[ZnWriteVINSERT128Ld], (instrs VINSERTF128rm,
+ VINSERTI128rm)>;
// VGATHER.
def : InstRW<[WriteMicrocoded], (instregex "VGATHER(Q|D)(PD|PS)(Y?)rm")>;
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver2.td b/llvm/lib/Target/X86/X86ScheduleZnver2.td
index 69246a06a5cc7..a2caa046f0bfb 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver2.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver2.td
@@ -1061,16 +1061,19 @@ def : InstRW<[Zn2WritePCMPGTQYm], (instrs VPCMPGTQYrm)>;
//=== Floating Point XMM and YMM Instructions ===//
//-- Move instructions --//
-// VPERM2F128.
-def : InstRW<[WriteMicrocoded], (instrs VPERM2F128rr)>;
-def : InstRW<[WriteMicrocoded], (instrs VPERM2F128rm)>;
+// VPERM2F128 / VPERM2I128.
+def : InstRW<[WriteMicrocoded], (instrs VPERM2F128rr,
+ VPERM2I128rr)>;
+def : InstRW<[WriteMicrocoded], (instrs VPERM2F128rm,
+ VPERM2I128rm)>;
def Zn2WriteBROADCAST : SchedWriteRes<[Zn2AGU, Zn2FPU13]> {
let NumMicroOps = 2;
let Latency = 8;
}
-// VBROADCASTF128.
-def : InstRW<[Zn2WriteBROADCAST], (instrs VBROADCASTF128)>;
+// VBROADCASTF128 / VBROADCASTI128.
+def : InstRW<[Zn2WriteBROADCAST], (instrs VBROADCASTF128,
+ VBROADCASTI128)>;
// EXTRACTPS.
// r32,x,i.
@@ -1089,12 +1092,14 @@ def Zn2WriteEXTRACTPSm : SchedWriteRes<[Zn2AGU,Zn2FPU12, Zn2FPU2]> {
// m32,x,i.
def : InstRW<[Zn2WriteEXTRACTPSm], (instregex "(V?)EXTRACTPSmr")>;
-// VEXTRACTF128.
+// VEXTRACTF128 / VEXTRACTI128.
// x,y,i.
-def : InstRW<[Zn2WriteFPU013], (instrs VEXTRACTF128rr)>;
+def : InstRW<[Zn2WriteFPU013], (instrs VEXTRACTF128rr,
+ VEXTRACTI128rr)>;
// m128,y,i.
-def : InstRW<[Zn2WriteFPU013m], (instrs VEXTRACTF128mr)>;
+def : InstRW<[Zn2WriteFPU013m], (instrs VEXTRACTF128mr,
+ VEXTRACTI128mr)>;
def Zn2WriteVINSERT128r: SchedWriteRes<[Zn2FPU013]> {
let Latency = 2;
@@ -1104,10 +1109,12 @@ def Zn2WriteVINSERT128Ld: SchedWriteRes<[Zn2AGU,Zn2FPU013]> {
let Latency = 9;
let NumMicroOps = 2;
}
-// VINSERTF128.
+// VINSERTF128 / VINSERTI128.
// y,y,x,i.
-def : InstRW<[Zn2WriteVINSERT128r], (instrs VINSERTF128rr)>;
-def : InstRW<[Zn2WriteVINSERT128Ld], (instrs VINSERTF128rm)>;
+def : InstRW<[Zn2WriteVINSERT128r], (instrs VINSERTF128rr,
+ VINSERTI128rr)>;
+def : InstRW<[Zn2WriteVINSERT128Ld], (instrs VINSERTF128rm,
+ VINSERTI128rm)>;
// VGATHER.
def : InstRW<[WriteMicrocoded], (instregex "VGATHER(Q|D)(PD|PS)(Y?)rm")>;
diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx2.s
index cf700c94252f2..cca125fd51583 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx2.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx2.s
@@ -460,11 +460,11 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 1 8 0.50 * vbroadcasti128 (%rax), %ymm0
+# CHECK-NEXT: 2 8 0.50 * vbroadcasti128 (%rax), %ymm0
# CHECK-NEXT: 2 2 1.00 vbroadcastsd %xmm0, %ymm0
# CHECK-NEXT: 2 2 1.00 vbroadcastss %xmm0, %ymm0
-# CHECK-NEXT: 2 2 1.00 vextracti128 $1, %ymm0, %xmm2
-# CHECK-NEXT: 1 1 0.50 * vextracti128 $1, %ymm0, (%rax)
+# CHECK-NEXT: 1 1 0.33 vextracti128 $1, %ymm0, %xmm2
+# CHECK-NEXT: 2 8 0.50 * vextracti128 $1, %ymm0, (%rax)
# CHECK-NEXT: 1 100 0.25 * vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 1 100 0.25 * vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2
# CHECK-NEXT: 1 100 0.25 * vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2
@@ -473,8 +473,8 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 100 0.25 * vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: 1 100 0.25 * vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 1 100 0.25 * vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2
-# CHECK-NEXT: 2 2 1.00 vinserti128 $1, %xmm0, %ymm1, %ymm2
-# CHECK-NEXT: 2 9 1.00 * vinserti128 $1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 2 0.67 vinserti128 $1, %xmm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 9 0.67 * vinserti128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 8 0.50 * vmovntdqa (%rax), %ymm0
# CHECK-NEXT: 1 100 0.25 vmpsadbw $1, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 100 0.25 * vmpsadbw $1, (%rax), %ymm1, %ymm2
@@ -558,8 +558,8 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 2 8 1.00 * vpcmpgtq (%rax), %ymm1, %ymm2
# CHECK-NEXT: 2 1 0.67 vpcmpgtw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 8 0.67 * vpcmpgtw (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 2 2 1.00 vperm2i128 $1, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 2 9 1.00 * vperm2i128 $1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 100 0.25 vperm2i128 $1, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 100 0.25 * vperm2i128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 2 2 1.00 vpermd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 9 1.00 * vpermd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 2 2 1.00 vpermpd $1, %ymm0, %ymm2
@@ -778,15 +778,15 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
-# CHECK-NEXT: 67.00 67.00 - - - - - 117.50 241.50 163.50 63.50 -
+# CHECK-NEXT: 66.50 66.50 - - - - - 119.50 238.50 158.00 66.00 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
-# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vbroadcasti128 (%rax), %ymm0
+# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 - 0.50 - vbroadcasti128 (%rax), %ymm0
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - vbroadcastsd %xmm0, %ymm0
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - vbroadcastss %xmm0, %ymm0
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - vextracti128 $1, %ymm0, %xmm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - vextracti128 $1, %ymm0, (%rax)
+# CHECK-NEXT: - - - - - - - 0.33 0.33 - 0.33 - vextracti128 $1, %ymm0, %xmm2
+# CHECK-NEXT: 0.50 0.50 - - - - - 0.33 0.33 - 0.33 - vextracti128 $1, %ymm0, (%rax)
# CHECK-NEXT: - - - - - - - - - - - - vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - - - - - - - - - - - vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2
# CHECK-NEXT: - - - - - - - - - - - - vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2
@@ -795,8 +795,8 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - - - - - vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: - - - - - - - - - - - - vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - - - - - - - - - - - vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - vinserti128 $1, %xmm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - 1.00 1.00 - - vinserti128 $1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - - - - - 0.67 0.67 - 0.67 - vinserti128 $1, %xmm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - 0.67 0.67 - 0.67 - vinserti128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - vmovntdqa (%rax), %ymm0
# CHECK-NEXT: - - - - - - - - - - - - vmpsadbw $1, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - - - - - vmpsadbw $1, (%rax), %ymm1, %ymm2
@@ -880,8 +880,8 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - 1.00 - vpcmpgtq (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - 0.67 0.67 - 0.67 - vpcmpgtw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 0.50 0.50 - - - - - 0.67 0.67 - 0.67 - vpcmpgtw (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - vperm2i128 $1, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - 1.00 1.00 - - vperm2i128 $1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - - - - - - - - - - vperm2i128 $1, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - - - - - - - - - - vperm2i128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - vpermd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 0.50 0.50 - - - - - - 1.00 1.00 - - vpermd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - vpermpd $1, %ymm0, %ymm2
diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx2.s
index 6909ccbf42446..c94ea07befb61 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx2.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx2.s
@@ -460,11 +460,11 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 1 8 0.50 * vbroadcasti128 (%rax), %ymm0
+# CHECK-NEXT: 2 8 0.50 * vbroadcasti128 (%rax), %ymm0
# CHECK-NEXT: 1 2 0.50 vbroadcastsd %xmm0, %ymm0
# CHECK-NEXT: 1 2 0.50 vbroadcastss %xmm0, %ymm0
-# CHECK-NEXT: 1 2 0.50 vextracti128 $1, %ymm0, %xmm2
-# CHECK-NEXT: 1 1 0.33 * vextracti128 $1, %ymm0, (%rax)
+# CHECK-NEXT: 1 1 0.33 vextracti128 $1, %ymm0, %xmm2
+# CHECK-NEXT: 2 8 0.33 * vextracti128 $1, %ymm0, (%rax)
# CHECK-NEXT: 1 100 0.25 * vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 1 100 0.25 * vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2
# CHECK-NEXT: 1 100 0.25 * vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2
@@ -473,8 +473,8 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 100 0.25 * vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: 1 100 0.25 * vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 1 100 0.25 * vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2
-# CHECK-NEXT: 1 2 0.50 vinserti128 $1, %xmm0, %ymm1, %ymm2
-# CHECK-NEXT: 1 9 0.50 * vinserti128 $1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 2 0.33 vinserti128 $1, %xmm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 9 0.33 * vinserti128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 8 0.33 * vmovntdqa (%rax), %ymm0
# CHECK-NEXT: 1 100 0.25 vmpsadbw $1, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 100 0.25 * vmpsadbw $1, (%rax), %ymm1, %ymm2
@@ -558,8 +558,8 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 8 0.50 * vpcmpgtq (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.33 vpcmpgtw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 8 0.33 * vpcmpgtw (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 2 0.50 vperm2i128 $1, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 1 9 0.50 * vperm2i128 $1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 100 0.25 vperm2i128 $1, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 1 100 0.25 * vperm2i128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 2 0.50 vpermd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 9 0.50 * vpermd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 2 0.50 vpermpd $1, %ymm0, %ymm2
@@ -779,15 +779,15 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12]
-# CHECK-NEXT: 44.67 44.67 44.67 - - - - - 61.83 126.83 74.00 32.33 -
+# CHECK-NEXT: 44.33 44.33 44.33 - - - - - 63.17 125.67 71.00 34.17 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions:
-# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 0.50 0.50 - - vbroadcasti128 (%rax), %ymm0
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 0.50 - 0.50 - vbroadcasti128 (%rax), %ymm0
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - vbroadcastsd %xmm0, %ymm0
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - vbroadcastss %xmm0, %ymm0
-# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - vextracti128 $1, %ymm0, %xmm2
-# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vextracti128 $1, %ymm0, (%rax)
+# CHECK-NEXT: - - - - - - - - 0.33 0.33 - 0.33 - vextracti128 $1, %ymm0, %xmm2
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.33 0.33 - 0.33 - vextracti128 $1, %ymm0, (%rax)
# CHECK-NEXT: - - - - - - - - - - - - - vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2
# CHECK-NEXT: - - - - - - - - - - - - - vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2
@@ -796,8 +796,8 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - - - - - - vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: - - - - - - - - - - - - - vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2
-# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - vinserti128 $1, %xmm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 0.50 0.50 - - vinserti128 $1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - - - - - - 0.33 0.33 - 0.33 - vinserti128 $1, %xmm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.33 0.33 - 0.33 - vinserti128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vmovntdqa (%rax), %ymm0
# CHECK-NEXT: - - - - - - - - - - - - - vmpsadbw $1, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - - - - - - vmpsadbw $1, (%rax), %ymm1, %ymm2
@@ -881,8 +881,8 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.50 - - 0.50 - vpcmpgtq (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - 0.33 0.33 - 0.33 - vpcmpgtw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.33 0.33 - 0.33 - vpcmpgtw (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - vperm2i128 $1, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 0.50 0.50 - - vperm2i128 $1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - - - - - - - - - - - vperm2i128 $1, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - - - - - - - - - - - vperm2i128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - vpermd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 0.50 0.50 - - vpermd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - vpermpd $1, %ymm0, %ymm2
More information about the llvm-commits
mailing list