[llvm] 2ce1698 - [X86] Fix perf bug in `permilps` -> `shufd` in X86FixupInstTuning.
Noah Goldstein via llvm-commits
llvm-commits at lists.llvm.org
Sun Apr 9 22:17:33 PDT 2023
Author: Noah Goldstein
Date: 2023-04-10T00:16:54-05:00
New Revision: 2ce1698a343c599910bceed399ca7020816b230e
URL: https://github.com/llvm/llvm-project/commit/2ce1698a343c599910bceed399ca7020816b230e
DIFF: https://github.com/llvm/llvm-project/commit/2ce1698a343c599910bceed399ca7020816b230e.diff
LOG: [X86] Fix perf bug in `permilps` -> `shufd` in X86FixupInstTuning.
We shouldn't do the transformation if we either have bypass delay OR
the new opcode has worse performance. Previous code was incorrectly
using AND.
Reviewed By: RKSimon
Differential Revision: https://reviews.llvm.org/D147727
Added:
Modified:
llvm/lib/Target/X86/X86FixupInstTuning.cpp
llvm/test/CodeGen/X86/tuning-shuffle-permilps-avx512.ll
llvm/test/CodeGen/X86/tuning-shuffle-permilps.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86FixupInstTuning.cpp b/llvm/lib/Target/X86/X86FixupInstTuning.cpp
index 4afd6007e872..599d7499125a 100644
--- a/llvm/lib/Target/X86/X86FixupInstTuning.cpp
+++ b/llvm/lib/Target/X86/X86FixupInstTuning.cpp
@@ -147,7 +147,7 @@ bool X86FixupInstTuningPass::processInstruction(
auto ProcessVPERMILPSmi = [&](unsigned NewOpc) -> bool {
// TODO: Might be work adding bypass delay if -Os/-Oz is enabled as
// `vpshufd` saves a byte of code size.
- if (!ST->hasNoDomainDelayShuffle() &&
+ if (!ST->hasNoDomainDelayShuffle() ||
!NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false))
return false;
MI.setDesc(TII->get(NewOpc));
diff --git a/llvm/test/CodeGen/X86/tuning-shuffle-permilps-avx512.ll b/llvm/test/CodeGen/X86/tuning-shuffle-permilps-avx512.ll
index 55cd0f4d1345..5d031f6017c7 100644
--- a/llvm/test/CodeGen/X86/tuning-shuffle-permilps-avx512.ll
+++ b/llvm/test/CodeGen/X86/tuning-shuffle-permilps-avx512.ll
@@ -108,40 +108,25 @@ define <4 x float> @transform_VPERMILPSrrk(<4 x float> %a, <4 x float> %b, i4 %m
}
define <16 x float> @transform_VPERMILPSZrm(ptr %ap) nounwind {
-; CHECK-ICX-NO-BYPASS-DELAY-LABEL: transform_VPERMILPSZrm:
-; CHECK-ICX-NO-BYPASS-DELAY: # %bb.0:
-; CHECK-ICX-NO-BYPASS-DELAY-NEXT: vpshufd {{.*#+}} zmm0 = mem[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
-; CHECK-ICX-NO-BYPASS-DELAY-NEXT: retq
-;
-; CHECK-ICX-BYPASS-DELAY-LABEL: transform_VPERMILPSZrm:
-; CHECK-ICX-BYPASS-DELAY: # %bb.0:
-; CHECK-ICX-BYPASS-DELAY-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
-; CHECK-ICX-BYPASS-DELAY-NEXT: retq
-;
-; CHECK-V4-LABEL: transform_VPERMILPSZrm:
-; CHECK-V4: # %bb.0:
-; CHECK-V4-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
-; CHECK-V4-NEXT: retq
-;
-; CHECK-AVX512-LABEL: transform_VPERMILPSZrm:
-; CHECK-AVX512: # %bb.0:
-; CHECK-AVX512-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
-; CHECK-AVX512-NEXT: retq
-;
-; CHECK-ZNVER4-LABEL: transform_VPERMILPSZrm:
-; CHECK-ZNVER4: # %bb.0:
-; CHECK-ZNVER4-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
-; CHECK-ZNVER4-NEXT: retq
+; CHECK-LABEL: transform_VPERMILPSZrm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
+; CHECK-NEXT: retq
%a = load <16 x float>, ptr %ap
%shufp = shufflevector <16 x float> %a, <16 x float> poison, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
ret <16 x float> %shufp
}
define <8 x float> @transform_VPERMILPSYrm(ptr %ap) nounwind {
-; CHECK-ICX-LABEL: transform_VPERMILPSYrm:
-; CHECK-ICX: # %bb.0:
-; CHECK-ICX-NEXT: vpshufd {{.*#+}} ymm0 = mem[3,2,1,0,7,6,5,4]
-; CHECK-ICX-NEXT: retq
+; CHECK-ICX-NO-BYPASS-DELAY-LABEL: transform_VPERMILPSYrm:
+; CHECK-ICX-NO-BYPASS-DELAY: # %bb.0:
+; CHECK-ICX-NO-BYPASS-DELAY-NEXT: vpshufd {{.*#+}} ymm0 = mem[3,2,1,0,7,6,5,4]
+; CHECK-ICX-NO-BYPASS-DELAY-NEXT: retq
+;
+; CHECK-ICX-BYPASS-DELAY-LABEL: transform_VPERMILPSYrm:
+; CHECK-ICX-BYPASS-DELAY: # %bb.0:
+; CHECK-ICX-BYPASS-DELAY-NEXT: vpermilps {{.*#+}} ymm0 = mem[3,2,1,0,7,6,5,4]
+; CHECK-ICX-BYPASS-DELAY-NEXT: retq
;
; CHECK-V4-LABEL: transform_VPERMILPSYrm:
; CHECK-V4: # %bb.0:
@@ -163,10 +148,15 @@ define <8 x float> @transform_VPERMILPSYrm(ptr %ap) nounwind {
}
define <4 x float> @transform_VPERMILPSrm(ptr %ap) nounwind {
-; CHECK-ICX-LABEL: transform_VPERMILPSrm:
-; CHECK-ICX: # %bb.0:
-; CHECK-ICX-NEXT: vpshufd {{.*#+}} xmm0 = mem[3,2,1,0]
-; CHECK-ICX-NEXT: retq
+; CHECK-ICX-NO-BYPASS-DELAY-LABEL: transform_VPERMILPSrm:
+; CHECK-ICX-NO-BYPASS-DELAY: # %bb.0:
+; CHECK-ICX-NO-BYPASS-DELAY-NEXT: vpshufd {{.*#+}} xmm0 = mem[3,2,1,0]
+; CHECK-ICX-NO-BYPASS-DELAY-NEXT: retq
+;
+; CHECK-ICX-BYPASS-DELAY-LABEL: transform_VPERMILPSrm:
+; CHECK-ICX-BYPASS-DELAY: # %bb.0:
+; CHECK-ICX-BYPASS-DELAY-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,2,1,0]
+; CHECK-ICX-BYPASS-DELAY-NEXT: retq
;
; CHECK-V4-LABEL: transform_VPERMILPSrm:
; CHECK-V4: # %bb.0:
@@ -188,35 +178,11 @@ define <4 x float> @transform_VPERMILPSrm(ptr %ap) nounwind {
}
define <16 x float> @transform_VPERMILPSZrmkz(ptr %ap, i16 %mask_int) nounwind {
-; CHECK-ICX-NO-BYPASS-DELAY-LABEL: transform_VPERMILPSZrmkz:
-; CHECK-ICX-NO-BYPASS-DELAY: # %bb.0:
-; CHECK-ICX-NO-BYPASS-DELAY-NEXT: kmovd %esi, %k1
-; CHECK-ICX-NO-BYPASS-DELAY-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
-; CHECK-ICX-NO-BYPASS-DELAY-NEXT: retq
-;
-; CHECK-ICX-BYPASS-DELAY-LABEL: transform_VPERMILPSZrmkz:
-; CHECK-ICX-BYPASS-DELAY: # %bb.0:
-; CHECK-ICX-BYPASS-DELAY-NEXT: kmovd %esi, %k1
-; CHECK-ICX-BYPASS-DELAY-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = mem[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
-; CHECK-ICX-BYPASS-DELAY-NEXT: retq
-;
-; CHECK-V4-LABEL: transform_VPERMILPSZrmkz:
-; CHECK-V4: # %bb.0:
-; CHECK-V4-NEXT: kmovd %esi, %k1
-; CHECK-V4-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = mem[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
-; CHECK-V4-NEXT: retq
-;
-; CHECK-AVX512-LABEL: transform_VPERMILPSZrmkz:
-; CHECK-AVX512: # %bb.0:
-; CHECK-AVX512-NEXT: kmovd %esi, %k1
-; CHECK-AVX512-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = mem[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
-; CHECK-AVX512-NEXT: retq
-;
-; CHECK-ZNVER4-LABEL: transform_VPERMILPSZrmkz:
-; CHECK-ZNVER4: # %bb.0:
-; CHECK-ZNVER4-NEXT: kmovd %esi, %k1
-; CHECK-ZNVER4-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = mem[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
-; CHECK-ZNVER4-NEXT: retq
+; CHECK-LABEL: transform_VPERMILPSZrmkz:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = mem[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
+; CHECK-NEXT: retq
%mask = bitcast i16 %mask_int to <16 x i1>
%a = load <16 x float>, ptr %ap
%shufp = shufflevector <16 x float> %a, <16 x float> poison, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
@@ -225,11 +191,17 @@ define <16 x float> @transform_VPERMILPSZrmkz(ptr %ap, i16 %mask_int) nounwind {
}
define <8 x float> @transform_VPERMILPSYrmkz(ptr %ap, i8 %mask_int) nounwind {
-; CHECK-ICX-LABEL: transform_VPERMILPSYrmkz:
-; CHECK-ICX: # %bb.0:
-; CHECK-ICX-NEXT: kmovd %esi, %k1
-; CHECK-ICX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,1,0,7,6,5,4]
-; CHECK-ICX-NEXT: retq
+; CHECK-ICX-NO-BYPASS-DELAY-LABEL: transform_VPERMILPSYrmkz:
+; CHECK-ICX-NO-BYPASS-DELAY: # %bb.0:
+; CHECK-ICX-NO-BYPASS-DELAY-NEXT: kmovd %esi, %k1
+; CHECK-ICX-NO-BYPASS-DELAY-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,1,0,7,6,5,4]
+; CHECK-ICX-NO-BYPASS-DELAY-NEXT: retq
+;
+; CHECK-ICX-BYPASS-DELAY-LABEL: transform_VPERMILPSYrmkz:
+; CHECK-ICX-BYPASS-DELAY: # %bb.0:
+; CHECK-ICX-BYPASS-DELAY-NEXT: kmovd %esi, %k1
+; CHECK-ICX-BYPASS-DELAY-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = mem[3,2,1,0,7,6,5,4]
+; CHECK-ICX-BYPASS-DELAY-NEXT: retq
;
; CHECK-V4-LABEL: transform_VPERMILPSYrmkz:
; CHECK-V4: # %bb.0:
@@ -256,11 +228,17 @@ define <8 x float> @transform_VPERMILPSYrmkz(ptr %ap, i8 %mask_int) nounwind {
}
define <4 x float> @transform_VPERMILPSrmkz(ptr %ap, i4 %mask_int) nounwind {
-; CHECK-ICX-LABEL: transform_VPERMILPSrmkz:
-; CHECK-ICX: # %bb.0:
-; CHECK-ICX-NEXT: kmovd %esi, %k1
-; CHECK-ICX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[3,2,1,0]
-; CHECK-ICX-NEXT: retq
+; CHECK-ICX-NO-BYPASS-DELAY-LABEL: transform_VPERMILPSrmkz:
+; CHECK-ICX-NO-BYPASS-DELAY: # %bb.0:
+; CHECK-ICX-NO-BYPASS-DELAY-NEXT: kmovd %esi, %k1
+; CHECK-ICX-NO-BYPASS-DELAY-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[3,2,1,0]
+; CHECK-ICX-NO-BYPASS-DELAY-NEXT: retq
+;
+; CHECK-ICX-BYPASS-DELAY-LABEL: transform_VPERMILPSrmkz:
+; CHECK-ICX-BYPASS-DELAY: # %bb.0:
+; CHECK-ICX-BYPASS-DELAY-NEXT: kmovd %esi, %k1
+; CHECK-ICX-BYPASS-DELAY-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = mem[3,2,1,0]
+; CHECK-ICX-BYPASS-DELAY-NEXT: retq
;
; CHECK-V4-LABEL: transform_VPERMILPSrmkz:
; CHECK-V4: # %bb.0:
@@ -287,35 +265,11 @@ define <4 x float> @transform_VPERMILPSrmkz(ptr %ap, i4 %mask_int) nounwind {
}
define <16 x float> @transform_VPERMILPSZrmk(ptr %ap, <16 x float> %b, i16 %mask_int) nounwind {
-; CHECK-ICX-NO-BYPASS-DELAY-LABEL: transform_VPERMILPSZrmk:
-; CHECK-ICX-NO-BYPASS-DELAY: # %bb.0:
-; CHECK-ICX-NO-BYPASS-DELAY-NEXT: kmovd %esi, %k1
-; CHECK-ICX-NO-BYPASS-DELAY-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
-; CHECK-ICX-NO-BYPASS-DELAY-NEXT: retq
-;
-; CHECK-ICX-BYPASS-DELAY-LABEL: transform_VPERMILPSZrmk:
-; CHECK-ICX-BYPASS-DELAY: # %bb.0:
-; CHECK-ICX-BYPASS-DELAY-NEXT: kmovd %esi, %k1
-; CHECK-ICX-BYPASS-DELAY-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = mem[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
-; CHECK-ICX-BYPASS-DELAY-NEXT: retq
-;
-; CHECK-V4-LABEL: transform_VPERMILPSZrmk:
-; CHECK-V4: # %bb.0:
-; CHECK-V4-NEXT: kmovd %esi, %k1
-; CHECK-V4-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = mem[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
-; CHECK-V4-NEXT: retq
-;
-; CHECK-AVX512-LABEL: transform_VPERMILPSZrmk:
-; CHECK-AVX512: # %bb.0:
-; CHECK-AVX512-NEXT: kmovd %esi, %k1
-; CHECK-AVX512-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = mem[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
-; CHECK-AVX512-NEXT: retq
-;
-; CHECK-ZNVER4-LABEL: transform_VPERMILPSZrmk:
-; CHECK-ZNVER4: # %bb.0:
-; CHECK-ZNVER4-NEXT: kmovd %esi, %k1
-; CHECK-ZNVER4-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = mem[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
-; CHECK-ZNVER4-NEXT: retq
+; CHECK-LABEL: transform_VPERMILPSZrmk:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = mem[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
+; CHECK-NEXT: retq
%mask = bitcast i16 %mask_int to <16 x i1>
%a = load <16 x float>, ptr %ap
%shufp = shufflevector <16 x float> %a, <16 x float> poison, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
@@ -324,11 +278,17 @@ define <16 x float> @transform_VPERMILPSZrmk(ptr %ap, <16 x float> %b, i16 %mask
}
define <8 x float> @transform_VPERMILPSYrmk(ptr %ap, <8 x float> %b, i8 %mask_int) nounwind {
-; CHECK-ICX-LABEL: transform_VPERMILPSYrmk:
-; CHECK-ICX: # %bb.0:
-; CHECK-ICX-NEXT: kmovd %esi, %k1
-; CHECK-ICX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,1,0,7,6,5,4]
-; CHECK-ICX-NEXT: retq
+; CHECK-ICX-NO-BYPASS-DELAY-LABEL: transform_VPERMILPSYrmk:
+; CHECK-ICX-NO-BYPASS-DELAY: # %bb.0:
+; CHECK-ICX-NO-BYPASS-DELAY-NEXT: kmovd %esi, %k1
+; CHECK-ICX-NO-BYPASS-DELAY-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,1,0,7,6,5,4]
+; CHECK-ICX-NO-BYPASS-DELAY-NEXT: retq
+;
+; CHECK-ICX-BYPASS-DELAY-LABEL: transform_VPERMILPSYrmk:
+; CHECK-ICX-BYPASS-DELAY: # %bb.0:
+; CHECK-ICX-BYPASS-DELAY-NEXT: kmovd %esi, %k1
+; CHECK-ICX-BYPASS-DELAY-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = mem[3,2,1,0,7,6,5,4]
+; CHECK-ICX-BYPASS-DELAY-NEXT: retq
;
; CHECK-V4-LABEL: transform_VPERMILPSYrmk:
; CHECK-V4: # %bb.0:
@@ -355,11 +315,17 @@ define <8 x float> @transform_VPERMILPSYrmk(ptr %ap, <8 x float> %b, i8 %mask_in
}
define <4 x float> @transform_VPERMILPSrmk(ptr %ap, <4 x float> %b, i4 %mask_int) nounwind {
-; CHECK-ICX-LABEL: transform_VPERMILPSrmk:
-; CHECK-ICX: # %bb.0:
-; CHECK-ICX-NEXT: kmovd %esi, %k1
-; CHECK-ICX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[3,2,1,0]
-; CHECK-ICX-NEXT: retq
+; CHECK-ICX-NO-BYPASS-DELAY-LABEL: transform_VPERMILPSrmk:
+; CHECK-ICX-NO-BYPASS-DELAY: # %bb.0:
+; CHECK-ICX-NO-BYPASS-DELAY-NEXT: kmovd %esi, %k1
+; CHECK-ICX-NO-BYPASS-DELAY-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[3,2,1,0]
+; CHECK-ICX-NO-BYPASS-DELAY-NEXT: retq
+;
+; CHECK-ICX-BYPASS-DELAY-LABEL: transform_VPERMILPSrmk:
+; CHECK-ICX-BYPASS-DELAY: # %bb.0:
+; CHECK-ICX-BYPASS-DELAY-NEXT: kmovd %esi, %k1
+; CHECK-ICX-BYPASS-DELAY-NEXT: vpermilps {{.*#+}} xmm0 {%k1} = mem[3,2,1,0]
+; CHECK-ICX-BYPASS-DELAY-NEXT: retq
;
; CHECK-V4-LABEL: transform_VPERMILPSrmk:
; CHECK-V4: # %bb.0:
@@ -384,3 +350,5 @@ define <4 x float> @transform_VPERMILPSrmk(ptr %ap, <4 x float> %b, i4 %mask_int
%res = select <4 x i1> %mask, <4 x float> %shufp, <4 x float> %b
ret <4 x float> %res
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-ICX: {{.*}}
diff --git a/llvm/test/CodeGen/X86/tuning-shuffle-permilps.ll b/llvm/test/CodeGen/X86/tuning-shuffle-permilps.ll
index 2f0d7d1517e8..01f8df684e74 100644
--- a/llvm/test/CodeGen/X86/tuning-shuffle-permilps.ll
+++ b/llvm/test/CodeGen/X86/tuning-shuffle-permilps.ll
@@ -39,7 +39,7 @@ define <8 x float> @transform_VPERMILPSYrm(ptr %ap) nounwind {
;
; CHECK-AVX2-LABEL: transform_VPERMILPSYrm:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vpshufd {{.*#+}} ymm0 = mem[3,2,1,0,7,6,5,4]
+; CHECK-AVX2-NEXT: vpermilps {{.*#+}} ymm0 = mem[3,2,1,0,7,6,5,4]
; CHECK-AVX2-NEXT: retq
;
; CHECK-AVX2-DELAY-LABEL: transform_VPERMILPSYrm:
@@ -47,10 +47,15 @@ define <8 x float> @transform_VPERMILPSYrm(ptr %ap) nounwind {
; CHECK-AVX2-DELAY-NEXT: vpermilps {{.*#+}} ymm0 = mem[3,2,1,0,7,6,5,4]
; CHECK-AVX2-DELAY-NEXT: retq
;
-; CHECK-ICX-LABEL: transform_VPERMILPSYrm:
-; CHECK-ICX: # %bb.0:
-; CHECK-ICX-NEXT: vpshufd {{.*#+}} ymm0 = mem[3,2,1,0,7,6,5,4]
-; CHECK-ICX-NEXT: retq
+; CHECK-ICX-NO-BYPASS-DELAY-LABEL: transform_VPERMILPSYrm:
+; CHECK-ICX-NO-BYPASS-DELAY: # %bb.0:
+; CHECK-ICX-NO-BYPASS-DELAY-NEXT: vpshufd {{.*#+}} ymm0 = mem[3,2,1,0,7,6,5,4]
+; CHECK-ICX-NO-BYPASS-DELAY-NEXT: retq
+;
+; CHECK-ICX-BYPASS-DELAY-LABEL: transform_VPERMILPSYrm:
+; CHECK-ICX-BYPASS-DELAY: # %bb.0:
+; CHECK-ICX-BYPASS-DELAY-NEXT: vpermilps {{.*#+}} ymm0 = mem[3,2,1,0,7,6,5,4]
+; CHECK-ICX-BYPASS-DELAY-NEXT: retq
;
; CHECK-SNB-LABEL: transform_VPERMILPSYrm:
; CHECK-SNB: # %bb.0:
@@ -64,7 +69,7 @@ define <8 x float> @transform_VPERMILPSYrm(ptr %ap) nounwind {
define <4 x float> @transform_VPERMILPSrm(ptr %ap) nounwind {
; CHECK-AVX1-LABEL: transform_VPERMILPSrm:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = mem[3,2,1,0]
+; CHECK-AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,2,1,0]
; CHECK-AVX1-NEXT: retq
;
; CHECK-AVX1-DELAY-LABEL: transform_VPERMILPSrm:
@@ -74,7 +79,7 @@ define <4 x float> @transform_VPERMILPSrm(ptr %ap) nounwind {
;
; CHECK-AVX2-LABEL: transform_VPERMILPSrm:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm0 = mem[3,2,1,0]
+; CHECK-AVX2-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,2,1,0]
; CHECK-AVX2-NEXT: retq
;
; CHECK-AVX2-DELAY-LABEL: transform_VPERMILPSrm:
@@ -82,21 +87,28 @@ define <4 x float> @transform_VPERMILPSrm(ptr %ap) nounwind {
; CHECK-AVX2-DELAY-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,2,1,0]
; CHECK-AVX2-DELAY-NEXT: retq
;
-; CHECK-ICX-LABEL: transform_VPERMILPSrm:
-; CHECK-ICX: # %bb.0:
-; CHECK-ICX-NEXT: vpshufd {{.*#+}} xmm0 = mem[3,2,1,0]
-; CHECK-ICX-NEXT: retq
+; CHECK-ICX-NO-BYPASS-DELAY-LABEL: transform_VPERMILPSrm:
+; CHECK-ICX-NO-BYPASS-DELAY: # %bb.0:
+; CHECK-ICX-NO-BYPASS-DELAY-NEXT: vpshufd {{.*#+}} xmm0 = mem[3,2,1,0]
+; CHECK-ICX-NO-BYPASS-DELAY-NEXT: retq
;
-; CHECK-SNB-LABEL: transform_VPERMILPSrm:
-; CHECK-SNB: # %bb.0:
-; CHECK-SNB-NEXT: vpshufd {{.*#+}} xmm0 = mem[3,2,1,0]
-; CHECK-SNB-NEXT: retq
+; CHECK-ICX-BYPASS-DELAY-LABEL: transform_VPERMILPSrm:
+; CHECK-ICX-BYPASS-DELAY: # %bb.0:
+; CHECK-ICX-BYPASS-DELAY-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,2,1,0]
+; CHECK-ICX-BYPASS-DELAY-NEXT: retq
+;
+; CHECK-SNB-NO-BYPASS-DELAY-LABEL: transform_VPERMILPSrm:
+; CHECK-SNB-NO-BYPASS-DELAY: # %bb.0:
+; CHECK-SNB-NO-BYPASS-DELAY-NEXT: vpshufd {{.*#+}} xmm0 = mem[3,2,1,0]
+; CHECK-SNB-NO-BYPASS-DELAY-NEXT: retq
+;
+; CHECK-SNB-BYPASS-DELAY-LABEL: transform_VPERMILPSrm:
+; CHECK-SNB-BYPASS-DELAY: # %bb.0:
+; CHECK-SNB-BYPASS-DELAY-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,2,1,0]
+; CHECK-SNB-BYPASS-DELAY-NEXT: retq
%a = load <4 x float>, ptr %ap
%shufp = shufflevector <4 x float> %a, <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x float> %shufp
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK-ICX-BYPASS-DELAY: {{.*}}
-; CHECK-ICX-NO-BYPASS-DELAY: {{.*}}
-; CHECK-SNB-BYPASS-DELAY: {{.*}}
-; CHECK-SNB-NO-BYPASS-DELAY: {{.*}}
+; CHECK-ICX: {{.*}}
More information about the llvm-commits
mailing list