[llvm] d9d28b3 - [X86][AVX] getFauxShuffleMask - fix sub vector size check in INSERT_SUBVECTOR(X,SHUFFLE(Y,Z))
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 3 07:26:45 PDT 2020
Author: Simon Pilgrim
Date: 2020-06-03T15:26:22+01:00
New Revision: d9d28b35599cfe39331ae37b74a21ccfb5f78af0
URL: https://github.com/llvm/llvm-project/commit/d9d28b35599cfe39331ae37b74a21ccfb5f78af0
DIFF: https://github.com/llvm/llvm-project/commit/d9d28b35599cfe39331ae37b74a21ccfb5f78af0.diff
LOG: [X86][AVX] getFauxShuffleMask - fix sub vector size check in INSERT_SUBVECTOR(X,SHUFFLE(Y,Z))
We were bailing on subvector shuffle inputs that were smaller than the subvector type instead of larger than it.
Fixes PR46178
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 5cf1a9450ec6..444454aab40b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -7439,8 +7439,8 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
return false;
// Subvector shuffle inputs must not be larger than the subvector.
- if (llvm::any_of(SubInputs, [SubVT](SDValue Op) {
- return SubVT.getSizeInBits() > Op.getValueSizeInBits();
+ if (llvm::any_of(SubInputs, [SubVT](SDValue SubInput) {
+ return SubVT.getSizeInBits() < SubInput.getValueSizeInBits();
}))
return false;
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
index e42691df9ac4..0c72e351706b 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
@@ -99,3 +99,57 @@ define <16 x i8> @combine_shuffle_vrotli_v4i32(<4 x i32> %a0) {
ret <16 x i8> %3
}
declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+
+define void @PR46178(i16* %0) {
+; X86-LABEL: PR46178:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: vmovdqu 0, %ymm0
+; X86-NEXT: vmovdqu (%eax), %ymm1
+; X86-NEXT: vpmovqw %ymm0, %xmm0
+; X86-NEXT: vpmovqw %ymm1, %xmm1
+; X86-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; X86-NEXT: vpsllw $8, %ymm0, %ymm0
+; X86-NEXT: vpsraw $8, %ymm0, %ymm0
+; X86-NEXT: vmovapd {{.*#+}} ymm1 = [0,0,2,0,4,0,4,0]
+; X86-NEXT: vxorpd %xmm2, %xmm2, %xmm2
+; X86-NEXT: vpermi2pd %ymm2, %ymm0, %ymm1
+; X86-NEXT: vmovupd %ymm1, (%eax)
+; X86-NEXT: vzeroupper
+; X86-NEXT: retl
+;
+; X64-LABEL: PR46178:
+; X64: # %bb.0:
+; X64-NEXT: vmovdqu 0, %ymm0
+; X64-NEXT: vmovdqu (%rax), %ymm1
+; X64-NEXT: vpmovqw %ymm0, %xmm0
+; X64-NEXT: vpmovqw %ymm1, %xmm1
+; X64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; X64-NEXT: vpsllw $8, %ymm0, %ymm0
+; X64-NEXT: vpsraw $8, %ymm0, %ymm0
+; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
+; X64-NEXT: vmovdqa %xmm0, %xmm0
+; X64-NEXT: vmovdqu %ymm0, (%rdi)
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+ %2 = load <4 x i64>, <4 x i64>* null, align 8
+ %3 = load <4 x i64>, <4 x i64>* undef, align 8
+ %4 = trunc <4 x i64> %2 to <4 x i16>
+ %5 = trunc <4 x i64> %3 to <4 x i16>
+ %6 = shl <4 x i16> %4, <i16 8, i16 8, i16 8, i16 8>
+ %7 = shl <4 x i16> %5, <i16 8, i16 8, i16 8, i16 8>
+ %8 = ashr exact <4 x i16> %6, <i16 8, i16 8, i16 8, i16 8>
+ %9 = ashr exact <4 x i16> %7, <i16 8, i16 8, i16 8, i16 8>
+ %10 = bitcast i16* %0 to <4 x i16>*
+ %11 = getelementptr inbounds i16, i16* %0, i64 4
+ %12 = bitcast i16* %11 to <4 x i16>*
+ %13 = getelementptr inbounds i16, i16* %0, i64 8
+ %14 = bitcast i16* %13 to <4 x i16>*
+ %15 = getelementptr inbounds i16, i16* %0, i64 12
+ %16 = bitcast i16* %15 to <4 x i16>*
+ store <4 x i16> %8, <4 x i16>* %10, align 2
+ store <4 x i16> %9, <4 x i16>* %12, align 2
+ store <4 x i16> zeroinitializer, <4 x i16>* %14, align 2
+ store <4 x i16> zeroinitializer, <4 x i16>* %16, align 2
+ ret void
+}
More information about the llvm-commits
mailing list