[llvm] 9aa7041 - [X86] Canonicalize VPERMV3 to VPERMV if both sources are the same.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 9 02:29:15 PST 2024
Author: Simon Pilgrim
Date: 2024-12-09T10:27:44Z
New Revision: 9aa70419b8834b1468f19f3515d91245133cb5e7
URL: https://github.com/llvm/llvm-project/commit/9aa70419b8834b1468f19f3515d91245133cb5e7
DIFF: https://github.com/llvm/llvm-project/commit/9aa70419b8834b1468f19f3515d91245133cb5e7.diff
LOG: [X86] Canonicalize VPERMV3 to VPERMV if both sources are the same.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-6.ll
llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll
llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 5c6b22896edd0f..59b730d52a4b3e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -42301,11 +42301,11 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
return SDValue();
}
case X86ISD::VPERMV3: {
- // Combine VPERMV3 to widened VPERMV if the two source operands are split
- // from the same vector.
SDValue V1 = peekThroughBitcasts(N.getOperand(0));
SDValue V2 = peekThroughBitcasts(N.getOperand(2));
MVT SVT = V1.getSimpleValueType();
+ // Combine VPERMV3 to widened VPERMV if the two source operands are split
+ // from the same vector.
if (V1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
V1.getConstantOperandVal(1) == 0 &&
V2.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
@@ -42326,14 +42326,25 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
}
SmallVector<SDValue, 2> Ops;
SmallVector<int, 32> Mask;
- if (isShuffleFoldableLoad(N.getOperand(0)) &&
- !isShuffleFoldableLoad(N.getOperand(2)) &&
- getTargetShuffleMask(N, /*AllowSentinelZero=*/false, Ops, Mask)) {
- ShuffleVectorSDNode::commuteMask(Mask);
- SDValue NewMask = getConstVector(
- Mask, N.getOperand(1).getSimpleValueType(), DAG, DL, /*IsMask=*/true);
- return DAG.getNode(X86ISD::VPERMV3, DL, VT, N.getOperand(2), NewMask,
- N.getOperand(0));
+ if (getTargetShuffleMask(N, /*AllowSentinelZero=*/false, Ops, Mask)) {
+ MVT MaskVT = N.getOperand(1).getSimpleValueType();
+ // Canonicalize to VPERMV if both sources are the same.
+ if (V1 == V2) {
+ for (int &M : Mask)
+ M = (M < 0 ? M : M & Mask.size() - 1);
+ SDValue NewMask = getConstVector(Mask, MaskVT, DAG, DL,
+ /*IsMask=*/true);
+ return DAG.getNode(X86ISD::VPERMV, DL, VT, NewMask, N.getOperand(0));
+ }
+ // Commute foldable source to the RHS.
+ if (isShuffleFoldableLoad(N.getOperand(0)) &&
+ !isShuffleFoldableLoad(N.getOperand(2))) {
+ ShuffleVectorSDNode::commuteMask(Mask);
+ SDValue NewMask =
+ getConstVector(Mask, MaskVT, DAG, DL, /*IsMask=*/true);
+ return DAG.getNode(X86ISD::VPERMV3, DL, VT, N.getOperand(2), NewMask,
+ N.getOperand(0));
+ }
}
return SDValue();
}
diff --git a/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-6.ll b/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-6.ll
index 218a492fb0e427..37919128ba9f7a 100644
--- a/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-6.ll
+++ b/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-6.ll
@@ -1599,12 +1599,12 @@ define void @store_i8_stride6_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX512BW-FCP-NEXT: vinserti128 $1, (%rsi), %ymm0, %ymm0
; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm3
; AVX512BW-FCP-NEXT: vinserti32x4 $1, (%r9), %zmm2, %zmm2
-; AVX512BW-FCP-NEXT: vpmovsxbq {{.*#+}} zmm4 = [4,6,4,6,8,10,9,11]
-; AVX512BW-FCP-NEXT: vpermi2q %zmm3, %zmm3, %zmm4
+; AVX512BW-FCP-NEXT: vpmovsxbq {{.*#+}} zmm4 = [4,6,4,6,0,2,1,3]
+; AVX512BW-FCP-NEXT: vpermq %zmm3, %zmm4, %zmm4
; AVX512BW-FCP-NEXT: vpshufb {{.*#+}} zmm4 = zmm4[u,u,0,8,u,u,u,u,1,9,u,u,u,u,2,10,u,u,u,u,19,27,u,u,u,u,20,28,u,u,u,u,u,u,u,u,38,46,u,u,u,u,39,47,u,u,u,u,48,56,u,u,u,u,49,57,u,u,u,u,50,58,u,u]
-; AVX512BW-FCP-NEXT: vpmovsxbq {{.*#+}} zmm5 = [0,2,0,2,12,14,13,15]
-; AVX512BW-FCP-NEXT: vpermi2q %zmm3, %zmm3, %zmm5
-; AVX512BW-FCP-NEXT: vpshufb {{.*#+}} zmm3 = zmm5[0,8,u,u,u,u,1,9,u,u,u,u,2,10,u,u,u,u,19,27,u,u,u,u,20,28,u,u,u,u,21,29,37,45,u,u,u,u,38,46,u,u,u,u,39,47,u,u,u,u,48,56,u,u,u,u,49,57,u,u,u,u,50,58]
+; AVX512BW-FCP-NEXT: vpmovsxbq {{.*#+}} zmm5 = [0,2,0,2,4,6,5,7]
+; AVX512BW-FCP-NEXT: vpermq %zmm3, %zmm5, %zmm3
+; AVX512BW-FCP-NEXT: vpshufb {{.*#+}} zmm3 = zmm3[0,8,u,u,u,u,1,9,u,u,u,u,2,10,u,u,u,u,19,27,u,u,u,u,20,28,u,u,u,u,21,29,37,45,u,u,u,u,38,46,u,u,u,u,39,47,u,u,u,u,48,56,u,u,u,u,49,57,u,u,u,u,50,58]
; AVX512BW-FCP-NEXT: movl $1227105426, %ecx # imm = 0x49242492
; AVX512BW-FCP-NEXT: kmovd %ecx, %k1
; AVX512BW-FCP-NEXT: vmovdqu16 %zmm4, %zmm3 {%k1}
@@ -1685,12 +1685,12 @@ define void @store_i8_stride6_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec
; AVX512DQ-BW-FCP-NEXT: vinserti128 $1, (%rsi), %ymm0, %ymm0
; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm3
; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, (%r9), %zmm2, %zmm2
-; AVX512DQ-BW-FCP-NEXT: vpmovsxbq {{.*#+}} zmm4 = [4,6,4,6,8,10,9,11]
-; AVX512DQ-BW-FCP-NEXT: vpermi2q %zmm3, %zmm3, %zmm4
+; AVX512DQ-BW-FCP-NEXT: vpmovsxbq {{.*#+}} zmm4 = [4,6,4,6,0,2,1,3]
+; AVX512DQ-BW-FCP-NEXT: vpermq %zmm3, %zmm4, %zmm4
; AVX512DQ-BW-FCP-NEXT: vpshufb {{.*#+}} zmm4 = zmm4[u,u,0,8,u,u,u,u,1,9,u,u,u,u,2,10,u,u,u,u,19,27,u,u,u,u,20,28,u,u,u,u,u,u,u,u,38,46,u,u,u,u,39,47,u,u,u,u,48,56,u,u,u,u,49,57,u,u,u,u,50,58,u,u]
-; AVX512DQ-BW-FCP-NEXT: vpmovsxbq {{.*#+}} zmm5 = [0,2,0,2,12,14,13,15]
-; AVX512DQ-BW-FCP-NEXT: vpermi2q %zmm3, %zmm3, %zmm5
-; AVX512DQ-BW-FCP-NEXT: vpshufb {{.*#+}} zmm3 = zmm5[0,8,u,u,u,u,1,9,u,u,u,u,2,10,u,u,u,u,19,27,u,u,u,u,20,28,u,u,u,u,21,29,37,45,u,u,u,u,38,46,u,u,u,u,39,47,u,u,u,u,48,56,u,u,u,u,49,57,u,u,u,u,50,58]
+; AVX512DQ-BW-FCP-NEXT: vpmovsxbq {{.*#+}} zmm5 = [0,2,0,2,4,6,5,7]
+; AVX512DQ-BW-FCP-NEXT: vpermq %zmm3, %zmm5, %zmm3
+; AVX512DQ-BW-FCP-NEXT: vpshufb {{.*#+}} zmm3 = zmm3[0,8,u,u,u,u,1,9,u,u,u,u,2,10,u,u,u,u,19,27,u,u,u,u,20,28,u,u,u,u,21,29,37,45,u,u,u,u,38,46,u,u,u,u,39,47,u,u,u,u,48,56,u,u,u,u,49,57,u,u,u,u,50,58]
; AVX512DQ-BW-FCP-NEXT: movl $1227105426, %ecx # imm = 0x49242492
; AVX512DQ-BW-FCP-NEXT: kmovd %ecx, %k1
; AVX512DQ-BW-FCP-NEXT: vmovdqu16 %zmm4, %zmm3 {%k1}
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
index b4375cfb343b7b..376fe37ef1fa86 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
@@ -24,8 +24,7 @@ define <32 x i16> @combine_vpermt2var_32i16_identity_mask(<32 x i16> %x0, <32 x
; X86-NEXT: vpmovsxbw {{.*#+}} zmm1 = [31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; X86-NEXT: vpermt2w %zmm0, %zmm1, %zmm0 {%k1} {z}
-; X86-NEXT: vpmovsxbw {{.*#+}} zmm1 = [63,30,61,28,59,26,57,24,55,22,53,20,51,18,49,16,47,46,13,44,11,42,9,40,7,38,5,36,3,34,1,32]
-; X86-NEXT: vpermt2w %zmm0, %zmm1, %zmm0 {%k1} {z}
+; X86-NEXT: vpermw %zmm0, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
; X64-LABEL: combine_vpermt2var_32i16_identity_mask:
@@ -33,8 +32,7 @@ define <32 x i16> @combine_vpermt2var_32i16_identity_mask(<32 x i16> %x0, <32 x
; X64-NEXT: vpmovsxbw {{.*#+}} zmm1 = [31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
; X64-NEXT: kmovd %edi, %k1
; X64-NEXT: vpermt2w %zmm0, %zmm1, %zmm0 {%k1} {z}
-; X64-NEXT: vpmovsxbw {{.*#+}} zmm1 = [63,30,61,28,59,26,57,24,55,22,53,20,51,18,49,16,47,46,13,44,11,42,9,40,7,38,5,36,3,34,1,32]
-; X64-NEXT: vpermt2w %zmm0, %zmm1, %zmm0 {%k1} {z}
+; X64-NEXT: vpermw %zmm0, %zmm1, %zmm0 {%k1} {z}
; X64-NEXT: retq
%res0 = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> <i16 31, i16 30, i16 29, i16 28, i16 27, i16 26, i16 25, i16 24, i16 23, i16 22, i16 21, i16 20, i16 19, i16 18, i16 17, i16 16, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <32 x i16> %x0, <32 x i16> %x1, i32 %m)
%res1 = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> <i16 63, i16 30, i16 61, i16 28, i16 59, i16 26, i16 57, i16 24, i16 55, i16 22, i16 53, i16 20, i16 51, i16 18, i16 49, i16 16, i16 47, i16 46, i16 13, i16 44, i16 11, i16 42, i16 9, i16 40, i16 7, i16 38, i16 5, i16 36, i16 3, i16 34, i16 1, i16 32>, <32 x i16> %res0, <32 x i16> %res0, i32 %m)
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
index 388511ce0741f6..f0b70ae26b1f0a 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
@@ -19,8 +19,7 @@ define <16 x i16> @combine_vpermt2var_16i16_identity_mask(<16 x i16> %x0, <16 x
; X86-NEXT: vpmovsxbw {{.*#+}} ymm1 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X86-NEXT: vpermt2w %ymm0, %ymm1, %ymm0 {%k1} {z}
-; X86-NEXT: vpmovsxbw {{.*#+}} ymm1 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
-; X86-NEXT: vpermt2w %ymm0, %ymm1, %ymm0 {%k1} {z}
+; X86-NEXT: vpermw %ymm0, %ymm1, %ymm0 {%k1} {z}
; X86-NEXT: retl
;
; X64-LABEL: combine_vpermt2var_16i16_identity_mask:
@@ -28,8 +27,7 @@ define <16 x i16> @combine_vpermt2var_16i16_identity_mask(<16 x i16> %x0, <16 x
; X64-NEXT: vpmovsxbw {{.*#+}} ymm1 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
; X64-NEXT: kmovd %edi, %k1
; X64-NEXT: vpermt2w %ymm0, %ymm1, %ymm0 {%k1} {z}
-; X64-NEXT: vpmovsxbw {{.*#+}} ymm1 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
-; X64-NEXT: vpermt2w %ymm0, %ymm1, %ymm0 {%k1} {z}
+; X64-NEXT: vpermw %ymm0, %ymm1, %ymm0 {%k1} {z}
; X64-NEXT: retq
%res0 = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> <i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <16 x i16> %x0, <16 x i16> %x1, i16 %m)
%res1 = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> <i16 15, i16 30, i16 13, i16 28, i16 11, i16 26, i16 9, i16 24, i16 7, i16 22, i16 5, i16 20, i16 3, i16 18, i16 1, i16 16>, <16 x i16> %res0, <16 x i16> %res0, i16 %m)
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll
index e87e810971e119..29806cd25fe3f4 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll
@@ -155,8 +155,7 @@ define <8 x double> @combine_vpermt2var_8f64_identity_mask(<8 x double> %x0, <8
; X86-AVX512F-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-AVX512F-NEXT: kmovw %eax, %k1
; X86-AVX512F-NEXT: vpermt2pd %zmm0, %zmm1, %zmm0 {%k1} {z}
-; X86-AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [7,0,14,0,5,0,12,0,3,0,10,0,1,0,8,0]
-; X86-AVX512F-NEXT: vpermt2pd %zmm0, %zmm1, %zmm0 {%k1} {z}
+; X86-AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0 {%k1} {z}
; X86-AVX512F-NEXT: retl
;
; X86-AVX512BW-LABEL: combine_vpermt2var_8f64_identity_mask:
@@ -165,8 +164,7 @@ define <8 x double> @combine_vpermt2var_8f64_identity_mask(<8 x double> %x0, <8
; X86-AVX512BW-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-AVX512BW-NEXT: kmovd %eax, %k1
; X86-AVX512BW-NEXT: vpermt2pd %zmm0, %zmm1, %zmm0 {%k1} {z}
-; X86-AVX512BW-NEXT: vmovapd {{.*#+}} zmm1 = [7,0,14,0,5,0,12,0,3,0,10,0,1,0,8,0]
-; X86-AVX512BW-NEXT: vpermt2pd %zmm0, %zmm1, %zmm0 {%k1} {z}
+; X86-AVX512BW-NEXT: vpermpd %zmm0, %zmm1, %zmm0 {%k1} {z}
; X86-AVX512BW-NEXT: retl
;
; X64-AVX512F-LABEL: combine_vpermt2var_8f64_identity_mask:
@@ -174,8 +172,7 @@ define <8 x double> @combine_vpermt2var_8f64_identity_mask(<8 x double> %x0, <8
; X64-AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [7,6,5,4,3,2,1,0]
; X64-AVX512F-NEXT: kmovw %edi, %k1
; X64-AVX512F-NEXT: vpermt2pd %zmm0, %zmm1, %zmm0 {%k1} {z}
-; X64-AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [7,14,5,12,3,10,1,8]
-; X64-AVX512F-NEXT: vpermt2pd %zmm0, %zmm1, %zmm0 {%k1} {z}
+; X64-AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0 {%k1} {z}
; X64-AVX512F-NEXT: retq
;
; X64-AVX512BW-LABEL: combine_vpermt2var_8f64_identity_mask:
@@ -183,8 +180,7 @@ define <8 x double> @combine_vpermt2var_8f64_identity_mask(<8 x double> %x0, <8
; X64-AVX512BW-NEXT: vmovapd {{.*#+}} zmm1 = [7,6,5,4,3,2,1,0]
; X64-AVX512BW-NEXT: kmovd %edi, %k1
; X64-AVX512BW-NEXT: vpermt2pd %zmm0, %zmm1, %zmm0 {%k1} {z}
-; X64-AVX512BW-NEXT: vmovapd {{.*#+}} zmm1 = [7,14,5,12,3,10,1,8]
-; X64-AVX512BW-NEXT: vpermt2pd %zmm0, %zmm1, %zmm0 {%k1} {z}
+; X64-AVX512BW-NEXT: vpermpd %zmm0, %zmm1, %zmm0 {%k1} {z}
; X64-AVX512BW-NEXT: retq
%res0 = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x double> %x0, <8 x double> %x1, i8 %m)
%res1 = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> <i64 7, i64 14, i64 5, i64 12, i64 3, i64 10, i64 1, i64 8>, <8 x double> %res0, <8 x double> %res0, i8 %m)
@@ -259,8 +255,7 @@ define <8 x i64> @combine_vpermt2var_8i64_identity_mask(<8 x i64> %x0, <8 x i64>
; X86-AVX512F-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-AVX512F-NEXT: kmovw %eax, %k1
; X86-AVX512F-NEXT: vpermt2q %zmm0, %zmm1, %zmm0 {%k1} {z}
-; X86-AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm1 = [7,14,5,12,3,10,1,8]
-; X86-AVX512F-NEXT: vpermt2q %zmm0, %zmm1, %zmm0 {%k1} {z}
+; X86-AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0 {%k1} {z}
; X86-AVX512F-NEXT: retl
;
; X86-AVX512BW-LABEL: combine_vpermt2var_8i64_identity_mask:
@@ -269,8 +264,7 @@ define <8 x i64> @combine_vpermt2var_8i64_identity_mask(<8 x i64> %x0, <8 x i64>
; X86-AVX512BW-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-AVX512BW-NEXT: kmovd %eax, %k1
; X86-AVX512BW-NEXT: vpermt2q %zmm0, %zmm1, %zmm0 {%k1} {z}
-; X86-AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm1 = [7,14,5,12,3,10,1,8]
-; X86-AVX512BW-NEXT: vpermt2q %zmm0, %zmm1, %zmm0 {%k1} {z}
+; X86-AVX512BW-NEXT: vpermq %zmm0, %zmm1, %zmm0 {%k1} {z}
; X86-AVX512BW-NEXT: retl
;
; X64-AVX512F-LABEL: combine_vpermt2var_8i64_identity_mask:
@@ -278,8 +272,7 @@ define <8 x i64> @combine_vpermt2var_8i64_identity_mask(<8 x i64> %x0, <8 x i64>
; X64-AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm1 = [7,6,5,4,3,2,1,0]
; X64-AVX512F-NEXT: kmovw %edi, %k1
; X64-AVX512F-NEXT: vpermt2q %zmm0, %zmm1, %zmm0 {%k1} {z}
-; X64-AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm1 = [7,14,5,12,3,10,1,8]
-; X64-AVX512F-NEXT: vpermt2q %zmm0, %zmm1, %zmm0 {%k1} {z}
+; X64-AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0 {%k1} {z}
; X64-AVX512F-NEXT: retq
;
; X64-AVX512BW-LABEL: combine_vpermt2var_8i64_identity_mask:
@@ -287,8 +280,7 @@ define <8 x i64> @combine_vpermt2var_8i64_identity_mask(<8 x i64> %x0, <8 x i64>
; X64-AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm1 = [7,6,5,4,3,2,1,0]
; X64-AVX512BW-NEXT: kmovd %edi, %k1
; X64-AVX512BW-NEXT: vpermt2q %zmm0, %zmm1, %zmm0 {%k1} {z}
-; X64-AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm1 = [7,14,5,12,3,10,1,8]
-; X64-AVX512BW-NEXT: vpermt2q %zmm0, %zmm1, %zmm0 {%k1} {z}
+; X64-AVX512BW-NEXT: vpermq %zmm0, %zmm1, %zmm0 {%k1} {z}
; X64-AVX512BW-NEXT: retq
%res0 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x i64> %x0, <8 x i64> %x1, i8 %m)
%res1 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> <i64 7, i64 14, i64 5, i64 12, i64 3, i64 10, i64 1, i64 8>, <8 x i64> %res0, <8 x i64> %res0, i8 %m)
@@ -309,8 +301,7 @@ define <16 x float> @combine_vpermt2var_16f32_identity_mask(<16 x float> %x0, <1
; X86-NEXT: vmovaps {{.*#+}} zmm1 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X86-NEXT: vpermt2ps %zmm0, %zmm1, %zmm0 {%k1} {z}
-; X86-NEXT: vmovaps {{.*#+}} zmm1 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
-; X86-NEXT: vpermt2ps %zmm0, %zmm1, %zmm0 {%k1} {z}
+; X86-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
; X64-AVX512F-LABEL: combine_vpermt2var_16f32_identity_mask:
@@ -318,8 +309,7 @@ define <16 x float> @combine_vpermt2var_16f32_identity_mask(<16 x float> %x0, <1
; X64-AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
; X64-AVX512F-NEXT: kmovw %edi, %k1
; X64-AVX512F-NEXT: vpermt2ps %zmm0, %zmm1, %zmm0 {%k1} {z}
-; X64-AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
-; X64-AVX512F-NEXT: vpermt2ps %zmm0, %zmm1, %zmm0 {%k1} {z}
+; X64-AVX512F-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z}
; X64-AVX512F-NEXT: retq
;
; X64-AVX512BW-LABEL: combine_vpermt2var_16f32_identity_mask:
@@ -327,8 +317,7 @@ define <16 x float> @combine_vpermt2var_16f32_identity_mask(<16 x float> %x0, <1
; X64-AVX512BW-NEXT: vmovaps {{.*#+}} zmm1 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
; X64-AVX512BW-NEXT: kmovd %edi, %k1
; X64-AVX512BW-NEXT: vpermt2ps %zmm0, %zmm1, %zmm0 {%k1} {z}
-; X64-AVX512BW-NEXT: vmovaps {{.*#+}} zmm1 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
-; X64-AVX512BW-NEXT: vpermt2ps %zmm0, %zmm1, %zmm0 {%k1} {z}
+; X64-AVX512BW-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z}
; X64-AVX512BW-NEXT: retq
%res0 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <16 x float> %x0, <16 x float> %x1, i16 %m)
%res1 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> <i32 15, i32 30, i32 13, i32 28, i32 11, i32 26, i32 9, i32 24, i32 7, i32 22, i32 5, i32 20, i32 3, i32 18, i32 1, i32 16>, <16 x float> %res0, <16 x float> %res0, i16 %m)
@@ -598,8 +587,7 @@ define <16 x i32> @combine_vpermt2var_16i32_identity_mask(<16 x i32> %x0, <16 x
; X86-NEXT: vpmovsxbd {{.*#+}} zmm1 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X86-NEXT: vpermt2d %zmm0, %zmm1, %zmm0 {%k1} {z}
-; X86-NEXT: vpmovsxbd {{.*#+}} zmm1 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
-; X86-NEXT: vpermt2d %zmm0, %zmm1, %zmm0 {%k1} {z}
+; X86-NEXT: vpermd %zmm0, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
; X64-AVX512F-LABEL: combine_vpermt2var_16i32_identity_mask:
@@ -607,8 +595,7 @@ define <16 x i32> @combine_vpermt2var_16i32_identity_mask(<16 x i32> %x0, <16 x
; X64-AVX512F-NEXT: vpmovsxbd {{.*#+}} zmm1 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
; X64-AVX512F-NEXT: kmovw %edi, %k1
; X64-AVX512F-NEXT: vpermt2d %zmm0, %zmm1, %zmm0 {%k1} {z}
-; X64-AVX512F-NEXT: vpmovsxbd {{.*#+}} zmm1 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
-; X64-AVX512F-NEXT: vpermt2d %zmm0, %zmm1, %zmm0 {%k1} {z}
+; X64-AVX512F-NEXT: vpermd %zmm0, %zmm1, %zmm0 {%k1} {z}
; X64-AVX512F-NEXT: retq
;
; X64-AVX512BW-LABEL: combine_vpermt2var_16i32_identity_mask:
@@ -616,8 +603,7 @@ define <16 x i32> @combine_vpermt2var_16i32_identity_mask(<16 x i32> %x0, <16 x
; X64-AVX512BW-NEXT: vpmovsxbd {{.*#+}} zmm1 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
; X64-AVX512BW-NEXT: kmovd %edi, %k1
; X64-AVX512BW-NEXT: vpermt2d %zmm0, %zmm1, %zmm0 {%k1} {z}
-; X64-AVX512BW-NEXT: vpmovsxbd {{.*#+}} zmm1 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
-; X64-AVX512BW-NEXT: vpermt2d %zmm0, %zmm1, %zmm0 {%k1} {z}
+; X64-AVX512BW-NEXT: vpermd %zmm0, %zmm1, %zmm0 {%k1} {z}
; X64-AVX512BW-NEXT: retq
%res0 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <16 x i32> %x0, <16 x i32> %x1, i16 %m)
%res1 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> <i32 15, i32 30, i32 13, i32 28, i32 11, i32 26, i32 9, i32 24, i32 7, i32 22, i32 5, i32 20, i32 3, i32 18, i32 1, i32 16>, <16 x i32> %res0, <16 x i32> %res0, i16 %m)
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll
index 04cfa3cedd58fd..ccc78823623df6 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll
@@ -32,8 +32,7 @@ define <16 x i8> @combine_vpermt2var_16i8_identity_mask(<16 x i8> %x0, <16 x i8>
; X86-NEXT: vmovdqa {{.*#+}} xmm1 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X86-NEXT: vpermt2b %xmm0, %xmm1, %xmm0 {%k1} {z}
-; X86-NEXT: vmovdqa {{.*#+}} xmm1 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
-; X86-NEXT: vpermt2b %xmm0, %xmm1, %xmm0 {%k1} {z}
+; X86-NEXT: vpermb %xmm0, %xmm1, %xmm0 {%k1} {z}
; X86-NEXT: retl
;
; X64-LABEL: combine_vpermt2var_16i8_identity_mask:
@@ -41,8 +40,7 @@ define <16 x i8> @combine_vpermt2var_16i8_identity_mask(<16 x i8> %x0, <16 x i8>
; X64-NEXT: vmovdqa {{.*#+}} xmm1 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
; X64-NEXT: kmovd %edi, %k1
; X64-NEXT: vpermt2b %xmm0, %xmm1, %xmm0 {%k1} {z}
-; X64-NEXT: vmovdqa {{.*#+}} xmm1 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
-; X64-NEXT: vpermt2b %xmm0, %xmm1, %xmm0 {%k1} {z}
+; X64-NEXT: vpermb %xmm0, %xmm1, %xmm0 {%k1} {z}
; X64-NEXT: retq
%res0 = call <16 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.128(<16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <16 x i8> %x0, <16 x i8> %x1, i16 %m)
%res1 = call <16 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.128(<16 x i8> <i8 15, i8 30, i8 13, i8 28, i8 11, i8 26, i8 9, i8 24, i8 7, i8 22, i8 5, i8 20, i8 3, i8 18, i8 1, i8 16>, <16 x i8> %res0, <16 x i8> %res0, i16 %m)
More information about the llvm-commits
mailing list