[llvm] 11ff517 - [X86][AVX] lowerShuffleWithVPMOV - add non-VLX support.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 18 09:52:22 PDT 2020
Author: Simon Pilgrim
Date: 2020-08-18T17:51:14+01:00
New Revision: 11ff5176c4655526960dd01024f5d1f99499d4ad
URL: https://github.com/llvm/llvm-project/commit/11ff5176c4655526960dd01024f5d1f99499d4ad
DIFF: https://github.com/llvm/llvm-project/commit/11ff5176c4655526960dd01024f5d1f99499d4ad.diff
LOG: [X86][AVX] lowerShuffleWithVPMOV - add non-VLX support.
We can efficiently handle non-VLX cases now that we have the getAVX512TruncNode helper.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index ec4d236dc3ea..2b19254c4344 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -11361,14 +11361,8 @@ static bool matchShuffleAsVPMOV(ArrayRef<int> Mask, bool SwappedOps,
// t51: v8i16 = vector_shuffle<0,2,4,6,12,13,14,15> t41, t21
// t18: v2i64 = bitcast t51
//
-// Without avx512vl, this is lowered to:
-//
-// vpmovqd %zmm0, %ymm0
-// vpshufb {{.*#+}} xmm0 =
-// xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero
-//
-// But when avx512vl is available, one can just use a single vpmovdw
-// instruction.
+// One can just use a single vpmovdw instruction, without avx512vl we need to
+// use the zmm variant and extract the lower subvector, padding with zeroes.
// TODO: Merge with lowerShuffleAsVTRUNC.
static SDValue lowerShuffleWithVPMOV(const SDLoc &DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
@@ -11400,11 +11394,6 @@ static SDValue lowerShuffleWithVPMOV(const SDLoc &DL, MVT VT, SDValue V1,
SDValue Src = V1.getOperand(0).getOperand(0);
MVT SrcVT = Src.getSimpleValueType();
- // The vptrunc** instructions truncating 128 bit and 256 bit vectors
- // are only available with avx512vl.
- if (!SrcVT.is512BitVector() && !Subtarget.hasVLX())
- return SDValue();
-
// Down Convert Word to Byte is only available with avx512bw. The case with
// 256-bit output doesn't contain a shuffle and is therefore not handled here.
if (SrcVT.getVectorElementType() == MVT::i16 && VT == MVT::v16i8 &&
@@ -11417,7 +11406,7 @@ static SDValue lowerShuffleWithVPMOV(const SDLoc &DL, MVT VT, SDValue V1,
!matchShuffleAsVPMOV(Mask, SwappedOps, 4))
return SDValue();
- return DAG.getNode(X86ISD::VTRUNC, DL, VT, Src);
+ return getAVX512TruncNode(DL, VT, Src, Subtarget, DAG, true);
}
// Attempt to match binary shuffle patterns as a truncate.
diff --git a/llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll b/llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll
index 1559fdbbe72c..3919f326d39a 100644
--- a/llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll
+++ b/llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll
@@ -561,9 +561,8 @@ define <16 x i8> @trunc_v8i32_to_v8i8_with_zext_return_v16i8(<8 x i32> %vec) nou
;
; AVX512F-LABEL: trunc_v8i32_to_v8i8_with_zext_return_v16i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512F-NEXT: vmovdqa %ymm0, %ymm0
+; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
@@ -575,9 +574,8 @@ define <16 x i8> @trunc_v8i32_to_v8i8_with_zext_return_v16i8(<8 x i32> %vec) nou
;
; AVX512BW-LABEL: trunc_v8i32_to_v8i8_with_zext_return_v16i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
-; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vmovdqa %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
@@ -621,9 +619,8 @@ define <16 x i8> @trunc_v8i32_to_v8i8_via_v8i16_return_v16i8(<8 x i32> %vec) nou
;
; AVX512F-LABEL: trunc_v8i32_to_v8i8_via_v8i16_return_v16i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512F-NEXT: vmovdqa %ymm0, %ymm0
+; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
@@ -635,9 +632,8 @@ define <16 x i8> @trunc_v8i32_to_v8i8_via_v8i16_return_v16i8(<8 x i32> %vec) nou
;
; AVX512BW-LABEL: trunc_v8i32_to_v8i8_via_v8i16_return_v16i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
-; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vmovdqa %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
@@ -816,9 +812,8 @@ define <8 x i16> @trunc_v4i64_to_v4i16_with_zext_return_v8i16(<4 x i64> %vec) no
;
; AVX512F-LABEL: trunc_v4i64_to_v4i16_with_zext_return_v8i16:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512F-NEXT: vmovdqa %ymm0, %ymm0
+; AVX512F-NEXT: vpmovqw %zmm0, %xmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
@@ -830,9 +825,8 @@ define <8 x i16> @trunc_v4i64_to_v4i16_with_zext_return_v8i16(<4 x i64> %vec) no
;
; AVX512BW-LABEL: trunc_v4i64_to_v4i16_with_zext_return_v8i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vmovdqa %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
@@ -881,9 +875,8 @@ define <8 x i16> @trunc_v4i64_to_v4i16_via_v4i32_return_v8i16(<4 x i64> %vec) no
;
; AVX512F-LABEL: trunc_v4i64_to_v4i16_via_v4i32_return_v8i16:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512F-NEXT: vmovdqa %ymm0, %ymm0
+; AVX512F-NEXT: vpmovqw %zmm0, %xmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
@@ -895,9 +888,8 @@ define <8 x i16> @trunc_v4i64_to_v4i16_via_v4i32_return_v8i16(<4 x i64> %vec) no
;
; AVX512BW-LABEL: trunc_v4i64_to_v4i16_via_v4i32_return_v8i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vmovdqa %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
More information about the llvm-commits
mailing list