[llvm] 54aeaa2 - [X86] Ensure 256-bit sqrt + crosslane shuffles are set to 2 uops + half rate

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sun Oct 30 07:25:56 PDT 2022


Author: Simon Pilgrim
Date: 2022-10-30T14:23:56Z
New Revision: 54aeaa2a8bae6dd9dbdae8e06fab7b29baf684bc

URL: https://github.com/llvm/llvm-project/commit/54aeaa2a8bae6dd9dbdae8e06fab7b29baf684bc
DIFF: https://github.com/llvm/llvm-project/commit/54aeaa2a8bae6dd9dbdae8e06fab7b29baf684bc.diff

LOG: [X86] Ensure 256-bit sqrt + crosslane shuffles are set to 2 uops + half rate

Fixes another mismatch between the D103695 script and the znver1 scheduler model

Confirmed with the AMD SoG, Agner + instlatx64

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ScheduleZnver1.td
    llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s
    llvm/test/tools/llvm-mca/X86/Znver1/resources-avx2.s

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td
index 01deab36d930..a7b4d6f98315 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver1.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td
@@ -365,15 +365,15 @@ defm : ZnWriteResFpuPair<WriteFRsqrtY,   [ZnFPU01], 5, [2], 2>;
 defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
 defm : ZnWriteResFpuPair<WriteFSqrt,     [ZnFPU3], 14, [5]>;
 defm : ZnWriteResFpuPair<WriteFSqrtX,    [ZnFPU3], 14, [5]>;
-defm : ZnWriteResFpuPair<WriteFSqrtY,    [ZnFPU3], 14, [10], 1>;
+defm : ZnWriteResFpuPair<WriteFSqrtY,    [ZnFPU3], 14, [10], 2>;
 defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
 defm : ZnWriteResFpuPair<WriteFSqrt64,   [ZnFPU3], 20, [8]>;
 defm : ZnWriteResFpuPair<WriteFSqrt64X,  [ZnFPU3], 20, [8]>;
-defm : ZnWriteResFpuPair<WriteFSqrt64Y,  [ZnFPU3], 20, [16], 1>;
+defm : ZnWriteResFpuPair<WriteFSqrt64Y,  [ZnFPU3], 20, [16], 2>;
 defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
 defm : ZnWriteResFpuPair<WriteFSqrt80,   [ZnFPU3], 20, [20]>;
-defm : ZnWriteResFpuPair<WriteFShuffle256, [ZnFPU], 2>;
-defm : ZnWriteResFpuPair<WriteFVarShuffle256, [ZnFPU], 2>;
+defm : ZnWriteResFpuPair<WriteFShuffle256, [ZnFPU], 2, [2], 2>;
+defm : ZnWriteResFpuPair<WriteFVarShuffle256, [ZnFPU], 2, [2], 2>;
 
 // Vector integer operations which uses FPU units
 defm : X86WriteRes<WriteVecLoad,         [ZnAGU], 8, [1], 1>;
@@ -440,9 +440,9 @@ defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
 defm : ZnWriteResFpuPair<WriteBlend,      [ZnFPU01], 1>;
 defm : ZnWriteResFpuPair<WriteBlendY,     [ZnFPU01], 1, [2], 2>;
 defm : X86WriteResPairUnsupported<WriteBlendZ>;
-defm : ZnWriteResFpuPair<WriteShuffle256, [ZnFPU],   2>;
+defm : ZnWriteResFpuPair<WriteShuffle256, [ZnFPU],   2, [2], 2>;
 defm : ZnWriteResFpuPair<WriteVPMOV256,   [ZnFPU12],  1, [4], 3>;
-defm : ZnWriteResFpuPair<WriteVarShuffle256, [ZnFPU],   2>;
+defm : ZnWriteResFpuPair<WriteVarShuffle256, [ZnFPU],   2, [2], 2>;
 defm : ZnWriteResFpuPair<WritePSADBW,     [ZnFPU0],  3>;
 defm : ZnWriteResFpuPair<WritePSADBWX,    [ZnFPU0],  3>;
 defm : ZnWriteResFpuPair<WritePSADBWY,    [ZnFPU0],  3, [2], 2>;

diff  --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s
index 421cbbf993e1..544f0710c238 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s
@@ -1660,12 +1660,12 @@ vzeroupper
 # CHECK-NEXT:  2      8     1.00    *                   vshufps	$1, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      20    8.00                        vsqrtpd	%xmm0, %xmm2
 # CHECK-NEXT:  1      27    8.00    *                   vsqrtpd	(%rax), %xmm2
-# CHECK-NEXT:  1      20    16.00                       vsqrtpd	%ymm0, %ymm2
-# CHECK-NEXT:  1      27    16.00   *                   vsqrtpd	(%rax), %ymm2
+# CHECK-NEXT:  2      20    16.00                       vsqrtpd	%ymm0, %ymm2
+# CHECK-NEXT:  2      27    16.00   *                   vsqrtpd	(%rax), %ymm2
 # CHECK-NEXT:  1      14    5.00                        vsqrtps	%xmm0, %xmm2
 # CHECK-NEXT:  1      21    5.00    *                   vsqrtps	(%rax), %xmm2
-# CHECK-NEXT:  1      14    10.00                       vsqrtps	%ymm0, %ymm2
-# CHECK-NEXT:  1      21    10.00   *                   vsqrtps	(%rax), %ymm2
+# CHECK-NEXT:  2      14    10.00                       vsqrtps	%ymm0, %ymm2
+# CHECK-NEXT:  2      21    10.00   *                   vsqrtps	(%rax), %ymm2
 # CHECK-NEXT:  1      20    8.00                        vsqrtsd	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  1      27    8.00    *                   vsqrtsd	(%rax), %xmm1, %xmm2
 # CHECK-NEXT:  1      14    5.00                        vsqrtss	%xmm0, %xmm1, %xmm2

diff  --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx2.s
index 179e6a4921f5..9f4b4b865e7a 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx2.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx2.s
@@ -461,9 +461,9 @@ vpxor           (%rax), %ymm1, %ymm2
 
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
 # CHECK-NEXT:  1      8     0.50    *                   vbroadcasti128	(%rax), %ymm0
-# CHECK-NEXT:  1      2     0.25                        vbroadcastsd	%xmm0, %ymm0
-# CHECK-NEXT:  1      2     0.25                        vbroadcastss	%xmm0, %ymm0
-# CHECK-NEXT:  1      2     0.25                        vextracti128	$1, %ymm0, %xmm2
+# CHECK-NEXT:  2      2     0.50                        vbroadcastsd	%xmm0, %ymm0
+# CHECK-NEXT:  2      2     0.50                        vbroadcastss	%xmm0, %ymm0
+# CHECK-NEXT:  2      2     0.50                        vextracti128	$1, %ymm0, %xmm2
 # CHECK-NEXT:  1      1     0.50           *            vextracti128	$1, %ymm0, (%rax)
 # CHECK-NEXT:  1      100   0.25    *                   vgatherdpd	%xmm0, (%rax,%xmm1,2), %xmm2
 # CHECK-NEXT:  1      100   0.25    *                   vgatherdpd	%ymm0, (%rax,%xmm1,2), %ymm2
@@ -473,8 +473,8 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      100   0.25    *                   vgatherqpd	%ymm0, (%rax,%ymm1,2), %ymm2
 # CHECK-NEXT:  1      100   0.25    *                   vgatherqps	%xmm0, (%rax,%xmm1,2), %xmm2
 # CHECK-NEXT:  1      100   0.25    *                   vgatherqps	%xmm0, (%rax,%ymm1,2), %xmm2
-# CHECK-NEXT:  1      2     0.25                        vinserti128	$1, %xmm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      9     0.50    *                   vinserti128	$1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  2      2     0.50                        vinserti128	$1, %xmm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      9     0.50    *                   vinserti128	$1, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      8     0.50    *                   vmovntdqa	(%rax), %ymm0
 # CHECK-NEXT:  1      100   0.25                        vmpsadbw	$1, %ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  1      100   0.25    *                   vmpsadbw	$1, (%rax), %ymm1, %ymm2
@@ -528,19 +528,19 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  2      9     0.50    *                   vpblendw	$11, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      1     0.25                        vpbroadcastb	%xmm0, %xmm0
 # CHECK-NEXT:  2      8     1.00    *                   vpbroadcastb	(%rax), %xmm0
-# CHECK-NEXT:  1      2     0.25                        vpbroadcastb	%xmm0, %ymm0
+# CHECK-NEXT:  2      2     0.50                        vpbroadcastb	%xmm0, %ymm0
 # CHECK-NEXT:  2      8     2.00    *                   vpbroadcastb	(%rax), %ymm0
 # CHECK-NEXT:  1      1     0.25                        vpbroadcastd	%xmm0, %xmm0
 # CHECK-NEXT:  1      8     0.50    *                   vpbroadcastd	(%rax), %xmm0
-# CHECK-NEXT:  1      2     0.25                        vpbroadcastd	%xmm0, %ymm0
+# CHECK-NEXT:  2      2     0.50                        vpbroadcastd	%xmm0, %ymm0
 # CHECK-NEXT:  1      8     0.50    *                   vpbroadcastd	(%rax), %ymm0
 # CHECK-NEXT:  1      1     0.25                        vpbroadcastq	%xmm0, %xmm0
 # CHECK-NEXT:  1      8     0.50    *                   vpbroadcastq	(%rax), %xmm0
-# CHECK-NEXT:  1      2     0.25                        vpbroadcastq	%xmm0, %ymm0
+# CHECK-NEXT:  2      2     0.50                        vpbroadcastq	%xmm0, %ymm0
 # CHECK-NEXT:  1      8     0.50    *                   vpbroadcastq	(%rax), %ymm0
 # CHECK-NEXT:  1      1     0.25                        vpbroadcastw	%xmm0, %xmm0
 # CHECK-NEXT:  2      8     1.00    *                   vpbroadcastw	(%rax), %xmm0
-# CHECK-NEXT:  1      2     0.25                        vpbroadcastw	%xmm0, %ymm0
+# CHECK-NEXT:  2      2     0.50                        vpbroadcastw	%xmm0, %ymm0
 # CHECK-NEXT:  2      8     2.00    *                   vpbroadcastw	(%rax), %ymm0
 # CHECK-NEXT:  2      1     0.50                        vpcmpeqb	%ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  2      8     0.50    *                   vpcmpeqb	(%rax), %ymm1, %ymm2
@@ -558,16 +558,16 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  2      8     1.00    *                   vpcmpgtq	(%rax), %ymm1, %ymm2
 # CHECK-NEXT:  2      1     0.50                        vpcmpgtw	%ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  2      8     0.50    *                   vpcmpgtw	(%rax), %ymm1, %ymm2
-# CHECK-NEXT:  1      2     0.25                        vperm2i128	$1, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      9     0.50    *                   vperm2i128	$1, (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  1      2     0.25                        vpermd	%ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      9     0.50    *                   vpermd	(%rax), %ymm1, %ymm2
-# CHECK-NEXT:  1      2     0.25                        vpermpd	$1, %ymm0, %ymm2
-# CHECK-NEXT:  1      9     0.50    *                   vpermpd	$1, (%rax), %ymm2
-# CHECK-NEXT:  1      2     0.25                        vpermps	%ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  1      9     0.50    *                   vpermps	(%rax), %ymm1, %ymm2
-# CHECK-NEXT:  1      2     0.25                        vpermq	$1, %ymm0, %ymm2
-# CHECK-NEXT:  1      9     0.50    *                   vpermq	$1, (%rax), %ymm2
+# CHECK-NEXT:  2      2     0.50                        vperm2i128	$1, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      9     0.50    *                   vperm2i128	$1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  2      2     0.50                        vpermd	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      9     0.50    *                   vpermd	(%rax), %ymm1, %ymm2
+# CHECK-NEXT:  2      2     0.50                        vpermpd	$1, %ymm0, %ymm2
+# CHECK-NEXT:  2      9     0.50    *                   vpermpd	$1, (%rax), %ymm2
+# CHECK-NEXT:  2      2     0.50                        vpermps	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  2      9     0.50    *                   vpermps	(%rax), %ymm1, %ymm2
+# CHECK-NEXT:  2      2     0.50                        vpermq	$1, %ymm0, %ymm2
+# CHECK-NEXT:  2      9     0.50    *                   vpermq	$1, (%rax), %ymm2
 # CHECK-NEXT:  1      100   0.25    *                   vpgatherdd	%xmm0, (%rax,%xmm1,2), %xmm2
 # CHECK-NEXT:  1      100   0.25    *                   vpgatherdd	%ymm0, (%rax,%ymm1,2), %ymm2
 # CHECK-NEXT:  1      100   0.25    *                   vpgatherdq	%xmm0, (%rax,%xmm1,2), %xmm2
@@ -778,14 +778,14 @@ vpxor           (%rax), %ymm1, %ymm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]
-# CHECK-NEXT: 67.00  67.00   -      -      -      -      -     128.17 192.67 172.00 74.17   -
+# CHECK-NEXT: 67.00  67.00   -      -      -      -      -     132.92 197.42 176.75 78.92   -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   Instructions:
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.25   0.25   0.25   0.25    -     vbroadcasti128	(%rax), %ymm0
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vbroadcastsd	%xmm0, %ymm0
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vbroadcastss	%xmm0, %ymm0
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vextracti128	$1, %ymm0, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vbroadcastsd	%xmm0, %ymm0
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vbroadcastss	%xmm0, %ymm0
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vextracti128	$1, %ymm0, %xmm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     vextracti128	$1, %ymm0, (%rax)
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vgatherdpd	%xmm0, (%rax,%xmm1,2), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vgatherdpd	%ymm0, (%rax,%xmm1,2), %ymm2
@@ -795,8 +795,8 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vgatherqpd	%ymm0, (%rax,%ymm1,2), %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vgatherqps	%xmm0, (%rax,%xmm1,2), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vgatherqps	%xmm0, (%rax,%ymm1,2), %xmm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vinserti128	$1, %xmm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.25   0.25   0.25   0.25    -     vinserti128	$1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vinserti128	$1, %xmm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50   0.50   0.50    -     vinserti128	$1, (%rax), %ymm1, %ymm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -      -      -      -     vmovntdqa	(%rax), %ymm0
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vmpsadbw	$1, %ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vmpsadbw	$1, (%rax), %ymm1, %ymm2
@@ -850,19 +850,19 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.33   0.33    -     0.33    -     vpblendw	$11, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpbroadcastb	%xmm0, %xmm0
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     1.00   1.00    -      -     vpbroadcastb	(%rax), %xmm0
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpbroadcastb	%xmm0, %ymm0
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpbroadcastb	%xmm0, %ymm0
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     2.00    -      -      -     vpbroadcastb	(%rax), %ymm0
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpbroadcastd	%xmm0, %xmm0
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpbroadcastd	(%rax), %xmm0
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpbroadcastd	%xmm0, %ymm0
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpbroadcastd	%xmm0, %ymm0
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpbroadcastd	(%rax), %ymm0
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpbroadcastq	%xmm0, %xmm0
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpbroadcastq	(%rax), %xmm0
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpbroadcastq	%xmm0, %ymm0
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpbroadcastq	%xmm0, %ymm0
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpbroadcastq	(%rax), %ymm0
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpbroadcastw	%xmm0, %xmm0
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     1.00   1.00    -      -     vpbroadcastw	(%rax), %xmm0
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpbroadcastw	%xmm0, %ymm0
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpbroadcastw	%xmm0, %ymm0
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     2.00    -      -      -     vpbroadcastw	(%rax), %ymm0
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpcmpeqb	%ymm0, %ymm1, %ymm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpcmpeqb	(%rax), %ymm1, %ymm2
@@ -880,16 +880,16 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     1.00    -      -     1.00    -     vpcmpgtq	(%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpcmpgtw	%ymm0, %ymm1, %ymm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpcmpgtw	(%rax), %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vperm2i128	$1, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.25   0.25   0.25   0.25    -     vperm2i128	$1, (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpermd	%ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpermd	(%rax), %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpermpd	$1, %ymm0, %ymm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpermpd	$1, (%rax), %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpermps	%ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpermps	(%rax), %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpermq	$1, %ymm0, %ymm2
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpermq	$1, (%rax), %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vperm2i128	$1, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50   0.50   0.50    -     vperm2i128	$1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpermd	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpermd	(%rax), %ymm1, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpermpd	$1, %ymm0, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpermpd	$1, (%rax), %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpermps	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpermps	(%rax), %ymm1, %ymm2
+# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpermq	$1, %ymm0, %ymm2
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50   0.50   0.50    -     vpermq	$1, (%rax), %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vpgatherdd	%xmm0, (%rax,%xmm1,2), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vpgatherdd	%ymm0, (%rax,%ymm1,2), %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -     vpgatherdq	%xmm0, (%rax,%xmm1,2), %xmm2


        


More information about the llvm-commits mailing list