[llvm] 54aeaa2 - [X86] Ensure 256-bit sqrt + crosslane shuffles are set to 2 uops + half rate
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Oct 30 07:25:56 PDT 2022
Author: Simon Pilgrim
Date: 2022-10-30T14:23:56Z
New Revision: 54aeaa2a8bae6dd9dbdae8e06fab7b29baf684bc
URL: https://github.com/llvm/llvm-project/commit/54aeaa2a8bae6dd9dbdae8e06fab7b29baf684bc
DIFF: https://github.com/llvm/llvm-project/commit/54aeaa2a8bae6dd9dbdae8e06fab7b29baf684bc.diff
LOG: [X86] Ensure 256-bit sqrt + crosslane shuffles are set to 2 uops + half rate
Fixes another mismatch between the D103695 script and the znver1 scheduler model
Confirmed with the AMD SoG, Agner + instlatx64
Added:
Modified:
llvm/lib/Target/X86/X86ScheduleZnver1.td
llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s
llvm/test/tools/llvm-mca/X86/Znver1/resources-avx2.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td
index 01deab36d930..a7b4d6f98315 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver1.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td
@@ -365,15 +365,15 @@ defm : ZnWriteResFpuPair<WriteFRsqrtY, [ZnFPU01], 5, [2], 2>;
defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
defm : ZnWriteResFpuPair<WriteFSqrt, [ZnFPU3], 14, [5]>;
defm : ZnWriteResFpuPair<WriteFSqrtX, [ZnFPU3], 14, [5]>;
-defm : ZnWriteResFpuPair<WriteFSqrtY, [ZnFPU3], 14, [10], 1>;
+defm : ZnWriteResFpuPair<WriteFSqrtY, [ZnFPU3], 14, [10], 2>;
defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
defm : ZnWriteResFpuPair<WriteFSqrt64, [ZnFPU3], 20, [8]>;
defm : ZnWriteResFpuPair<WriteFSqrt64X, [ZnFPU3], 20, [8]>;
-defm : ZnWriteResFpuPair<WriteFSqrt64Y, [ZnFPU3], 20, [16], 1>;
+defm : ZnWriteResFpuPair<WriteFSqrt64Y, [ZnFPU3], 20, [16], 2>;
defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
defm : ZnWriteResFpuPair<WriteFSqrt80, [ZnFPU3], 20, [20]>;
-defm : ZnWriteResFpuPair<WriteFShuffle256, [ZnFPU], 2>;
-defm : ZnWriteResFpuPair<WriteFVarShuffle256, [ZnFPU], 2>;
+defm : ZnWriteResFpuPair<WriteFShuffle256, [ZnFPU], 2, [2], 2>;
+defm : ZnWriteResFpuPair<WriteFVarShuffle256, [ZnFPU], 2, [2], 2>;
// Vector integer operations which uses FPU units
defm : X86WriteRes<WriteVecLoad, [ZnAGU], 8, [1], 1>;
@@ -440,9 +440,9 @@ defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
defm : ZnWriteResFpuPair<WriteBlend, [ZnFPU01], 1>;
defm : ZnWriteResFpuPair<WriteBlendY, [ZnFPU01], 1, [2], 2>;
defm : X86WriteResPairUnsupported<WriteBlendZ>;
-defm : ZnWriteResFpuPair<WriteShuffle256, [ZnFPU], 2>;
+defm : ZnWriteResFpuPair<WriteShuffle256, [ZnFPU], 2, [2], 2>;
defm : ZnWriteResFpuPair<WriteVPMOV256, [ZnFPU12], 1, [4], 3>;
-defm : ZnWriteResFpuPair<WriteVarShuffle256, [ZnFPU], 2>;
+defm : ZnWriteResFpuPair<WriteVarShuffle256, [ZnFPU], 2, [2], 2>;
defm : ZnWriteResFpuPair<WritePSADBW, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WritePSADBWX, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WritePSADBWY, [ZnFPU0], 3, [2], 2>;
diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s
index 421cbbf993e1..544f0710c238 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s
@@ -1660,12 +1660,12 @@ vzeroupper
# CHECK-NEXT: 2 8 1.00 * vshufps $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 20 8.00 vsqrtpd %xmm0, %xmm2
# CHECK-NEXT: 1 27 8.00 * vsqrtpd (%rax), %xmm2
-# CHECK-NEXT: 1 20 16.00 vsqrtpd %ymm0, %ymm2
-# CHECK-NEXT: 1 27 16.00 * vsqrtpd (%rax), %ymm2
+# CHECK-NEXT: 2 20 16.00 vsqrtpd %ymm0, %ymm2
+# CHECK-NEXT: 2 27 16.00 * vsqrtpd (%rax), %ymm2
# CHECK-NEXT: 1 14 5.00 vsqrtps %xmm0, %xmm2
# CHECK-NEXT: 1 21 5.00 * vsqrtps (%rax), %xmm2
-# CHECK-NEXT: 1 14 10.00 vsqrtps %ymm0, %ymm2
-# CHECK-NEXT: 1 21 10.00 * vsqrtps (%rax), %ymm2
+# CHECK-NEXT: 2 14 10.00 vsqrtps %ymm0, %ymm2
+# CHECK-NEXT: 2 21 10.00 * vsqrtps (%rax), %ymm2
# CHECK-NEXT: 1 20 8.00 vsqrtsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 27 8.00 * vsqrtsd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 14 5.00 vsqrtss %xmm0, %xmm1, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx2.s
index 179e6a4921f5..9f4b4b865e7a 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx2.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx2.s
@@ -461,9 +461,9 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 8 0.50 * vbroadcasti128 (%rax), %ymm0
-# CHECK-NEXT: 1 2 0.25 vbroadcastsd %xmm0, %ymm0
-# CHECK-NEXT: 1 2 0.25 vbroadcastss %xmm0, %ymm0
-# CHECK-NEXT: 1 2 0.25 vextracti128 $1, %ymm0, %xmm2
+# CHECK-NEXT: 2 2 0.50 vbroadcastsd %xmm0, %ymm0
+# CHECK-NEXT: 2 2 0.50 vbroadcastss %xmm0, %ymm0
+# CHECK-NEXT: 2 2 0.50 vextracti128 $1, %ymm0, %xmm2
# CHECK-NEXT: 1 1 0.50 * vextracti128 $1, %ymm0, (%rax)
# CHECK-NEXT: 1 100 0.25 * vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 1 100 0.25 * vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2
@@ -473,8 +473,8 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 100 0.25 * vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: 1 100 0.25 * vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 1 100 0.25 * vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2
-# CHECK-NEXT: 1 2 0.25 vinserti128 $1, %xmm0, %ymm1, %ymm2
-# CHECK-NEXT: 1 9 0.50 * vinserti128 $1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 2 2 0.50 vinserti128 $1, %xmm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 9 0.50 * vinserti128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 8 0.50 * vmovntdqa (%rax), %ymm0
# CHECK-NEXT: 1 100 0.25 vmpsadbw $1, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 100 0.25 * vmpsadbw $1, (%rax), %ymm1, %ymm2
@@ -528,19 +528,19 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 2 9 0.50 * vpblendw $11, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.25 vpbroadcastb %xmm0, %xmm0
# CHECK-NEXT: 2 8 1.00 * vpbroadcastb (%rax), %xmm0
-# CHECK-NEXT: 1 2 0.25 vpbroadcastb %xmm0, %ymm0
+# CHECK-NEXT: 2 2 0.50 vpbroadcastb %xmm0, %ymm0
# CHECK-NEXT: 2 8 2.00 * vpbroadcastb (%rax), %ymm0
# CHECK-NEXT: 1 1 0.25 vpbroadcastd %xmm0, %xmm0
# CHECK-NEXT: 1 8 0.50 * vpbroadcastd (%rax), %xmm0
-# CHECK-NEXT: 1 2 0.25 vpbroadcastd %xmm0, %ymm0
+# CHECK-NEXT: 2 2 0.50 vpbroadcastd %xmm0, %ymm0
# CHECK-NEXT: 1 8 0.50 * vpbroadcastd (%rax), %ymm0
# CHECK-NEXT: 1 1 0.25 vpbroadcastq %xmm0, %xmm0
# CHECK-NEXT: 1 8 0.50 * vpbroadcastq (%rax), %xmm0
-# CHECK-NEXT: 1 2 0.25 vpbroadcastq %xmm0, %ymm0
+# CHECK-NEXT: 2 2 0.50 vpbroadcastq %xmm0, %ymm0
# CHECK-NEXT: 1 8 0.50 * vpbroadcastq (%rax), %ymm0
# CHECK-NEXT: 1 1 0.25 vpbroadcastw %xmm0, %xmm0
# CHECK-NEXT: 2 8 1.00 * vpbroadcastw (%rax), %xmm0
-# CHECK-NEXT: 1 2 0.25 vpbroadcastw %xmm0, %ymm0
+# CHECK-NEXT: 2 2 0.50 vpbroadcastw %xmm0, %ymm0
# CHECK-NEXT: 2 8 2.00 * vpbroadcastw (%rax), %ymm0
# CHECK-NEXT: 2 1 0.50 vpcmpeqb %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 8 0.50 * vpcmpeqb (%rax), %ymm1, %ymm2
@@ -558,16 +558,16 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 2 8 1.00 * vpcmpgtq (%rax), %ymm1, %ymm2
# CHECK-NEXT: 2 1 0.50 vpcmpgtw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 8 0.50 * vpcmpgtw (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 2 0.25 vperm2i128 $1, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 1 9 0.50 * vperm2i128 $1, (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 2 0.25 vpermd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 1 9 0.50 * vpermd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 2 0.25 vpermpd $1, %ymm0, %ymm2
-# CHECK-NEXT: 1 9 0.50 * vpermpd $1, (%rax), %ymm2
-# CHECK-NEXT: 1 2 0.25 vpermps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 1 9 0.50 * vpermps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 2 0.25 vpermq $1, %ymm0, %ymm2
-# CHECK-NEXT: 1 9 0.50 * vpermq $1, (%rax), %ymm2
+# CHECK-NEXT: 2 2 0.50 vperm2i128 $1, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 9 0.50 * vperm2i128 $1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 2 2 0.50 vpermd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 9 0.50 * vpermd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 2 2 0.50 vpermpd $1, %ymm0, %ymm2
+# CHECK-NEXT: 2 9 0.50 * vpermpd $1, (%rax), %ymm2
+# CHECK-NEXT: 2 2 0.50 vpermps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 9 0.50 * vpermps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 2 2 0.50 vpermq $1, %ymm0, %ymm2
+# CHECK-NEXT: 2 9 0.50 * vpermq $1, (%rax), %ymm2
# CHECK-NEXT: 1 100 0.25 * vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 1 100 0.25 * vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: 1 100 0.25 * vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2
@@ -778,14 +778,14 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
-# CHECK-NEXT: 67.00 67.00 - - - - - 128.17 192.67 172.00 74.17 -
+# CHECK-NEXT: 67.00 67.00 - - - - - 132.92 197.42 176.75 78.92 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vbroadcasti128 (%rax), %ymm0
-# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vbroadcastsd %xmm0, %ymm0
-# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vbroadcastss %xmm0, %ymm0
-# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vextracti128 $1, %ymm0, %xmm2
+# CHECK-NEXT: - - - - - - - 0.50 0.50 0.50 0.50 - vbroadcastsd %xmm0, %ymm0
+# CHECK-NEXT: - - - - - - - 0.50 0.50 0.50 0.50 - vbroadcastss %xmm0, %ymm0
+# CHECK-NEXT: - - - - - - - 0.50 0.50 0.50 0.50 - vextracti128 $1, %ymm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - vextracti128 $1, %ymm0, (%rax)
# CHECK-NEXT: - - - - - - - - - - - - vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - - - - - - - - - - - vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2
@@ -795,8 +795,8 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - - - - - vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: - - - - - - - - - - - - vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - - - - - - - - - - - vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2
-# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vinserti128 $1, %xmm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vinserti128 $1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - - - - - 0.50 0.50 0.50 0.50 - vinserti128 $1, %xmm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 0.50 0.50 - vinserti128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - vmovntdqa (%rax), %ymm0
# CHECK-NEXT: - - - - - - - - - - - - vmpsadbw $1, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - - - - - vmpsadbw $1, (%rax), %ymm1, %ymm2
@@ -850,19 +850,19 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 0.50 0.50 - - - - - 0.33 0.33 - 0.33 - vpblendw $11, (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpbroadcastb %xmm0, %xmm0
# CHECK-NEXT: 0.50 0.50 - - - - - - 1.00 1.00 - - vpbroadcastb (%rax), %xmm0
-# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpbroadcastb %xmm0, %ymm0
+# CHECK-NEXT: - - - - - - - 0.50 0.50 0.50 0.50 - vpbroadcastb %xmm0, %ymm0
# CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 - - - vpbroadcastb (%rax), %ymm0
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpbroadcastd %xmm0, %xmm0
# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpbroadcastd (%rax), %xmm0
-# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpbroadcastd %xmm0, %ymm0
+# CHECK-NEXT: - - - - - - - 0.50 0.50 0.50 0.50 - vpbroadcastd %xmm0, %ymm0
# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpbroadcastd (%rax), %ymm0
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpbroadcastq %xmm0, %xmm0
# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpbroadcastq (%rax), %xmm0
-# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpbroadcastq %xmm0, %ymm0
+# CHECK-NEXT: - - - - - - - 0.50 0.50 0.50 0.50 - vpbroadcastq %xmm0, %ymm0
# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpbroadcastq (%rax), %ymm0
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpbroadcastw %xmm0, %xmm0
# CHECK-NEXT: 0.50 0.50 - - - - - - 1.00 1.00 - - vpbroadcastw (%rax), %xmm0
-# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpbroadcastw %xmm0, %ymm0
+# CHECK-NEXT: - - - - - - - 0.50 0.50 0.50 0.50 - vpbroadcastw %xmm0, %ymm0
# CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 - - - vpbroadcastw (%rax), %ymm0
# CHECK-NEXT: - - - - - - - 0.50 0.50 0.50 0.50 - vpcmpeqb %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 0.50 0.50 - vpcmpeqb (%rax), %ymm1, %ymm2
@@ -880,16 +880,16 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - 1.00 - vpcmpgtq (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - 0.50 0.50 0.50 0.50 - vpcmpgtw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 0.50 0.50 - vpcmpgtw (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vperm2i128 $1, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vperm2i128 $1, (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpermd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpermd (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpermpd $1, %ymm0, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpermpd $1, (%rax), %ymm2
-# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpermps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpermps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpermq $1, %ymm0, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpermq $1, (%rax), %ymm2
+# CHECK-NEXT: - - - - - - - 0.50 0.50 0.50 0.50 - vperm2i128 $1, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 0.50 0.50 - vperm2i128 $1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - - - - - 0.50 0.50 0.50 0.50 - vpermd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 0.50 0.50 - vpermd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - - - - - 0.50 0.50 0.50 0.50 - vpermpd $1, %ymm0, %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 0.50 0.50 - vpermpd $1, (%rax), %ymm2
+# CHECK-NEXT: - - - - - - - 0.50 0.50 0.50 0.50 - vpermps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 0.50 0.50 - vpermps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - - - - - 0.50 0.50 0.50 0.50 - vpermq $1, %ymm0, %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 0.50 0.50 - vpermq $1, (%rax), %ymm2
# CHECK-NEXT: - - - - - - - - - - - - vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: - - - - - - - - - - - - vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: - - - - - - - - - - - - vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2
More information about the llvm-commits
mailing list