[llvm] b61cef3 - [X86][AVX] getAVX512TruncNode - don't truncate from illegal vector widths.

Wed Aug 19 05:06:36 PDT 2020

Author: Simon Pilgrim
Date: 2020-08-19T13:00:26+01:00
New Revision: b61cef3a921bb21ca0e2dc4b1f079a8f1a91d65e

URL: https://github.com/llvm/llvm-project/commit/b61cef3a921bb21ca0e2dc4b1f079a8f1a91d65e
DIFF: https://github.com/llvm/llvm-project/commit/b61cef3a921bb21ca0e2dc4b1f079a8f1a91d65e.diff

LOG: [X86][AVX] getAVX512TruncNode - don't truncate from illegal vector widths.

Thanks to @fhahn for the test case.

Added: 
    llvm/test/CodeGen/X86/trunc-vector-width.ll

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index ada2242b0c7b..7ffb8791a2d1 100644

--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -11297,6 +11297,9 @@ static SDValue getAVX512TruncNode(const SDLoc &DL, MVT DstVT, SDValue Src,
   unsigned NumSrcElts = SrcVT.getVectorNumElements();
   unsigned DstEltSizeInBits = DstVT.getScalarSizeInBits();
 
+  if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT))
+    return SDValue();
+
   // Perform a direct ISD::TRUNCATE if possible.
   if (NumSrcElts == NumDstElts)
     return DAG.getNode(ISD::TRUNCATE, DL, DstVT, Src);

diff  --git a/llvm/test/CodeGen/X86/trunc-vector-width.ll b/llvm/test/CodeGen/X86/trunc-vector-width.ll
new file mode 100644
index 000000000000..ef60aa6ffbea
--- /dev/null
+++ b/llvm/test/CodeGen/X86/trunc-vector-width.ll
@@ -0,0 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 -mattr=prefer-256-bit | FileCheck %s
+
+define void @test(<64 x i8>* %a0) #0 {
+; CHECK-LABEL: test:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmovdqu (%rdi), %xmm0
+; CHECK-NEXT:    vpblendd {{.*#+}} xmm0 = mem[0],xmm0[1,2,3]
+; CHECK-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,4,5,5,0,0,1,1,u,u,u,u,u,u,u,u]
+; CHECK-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    vpextrb $1, %xmm0, (%rax)
+; CHECK-NEXT:    vpextrb $4, %xmm0, (%rax)
+; CHECK-NEXT:    vpextrb $8, %xmm0, (%rax)
+; CHECK-NEXT:    retq
+  %load = load <64 x i8>, <64 x i8>* %a0, align 1
+  %shuf = shufflevector <64 x i8> %load, <64 x i8> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60>
+  %xor = xor <16 x i8> %shuf, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  %i1 = extractelement <16 x i8> %xor, i32 1
+  %i2 = extractelement <16 x i8> %xor, i32 4
+  %i3 = extractelement <16 x i8> %xor, i32 8
+  store i8 %i1, i8* undef, align 1
+  store i8 %i2, i8* undef, align 1
+  store i8 %i3, i8* undef, align 1
+  ret void
+}
+
+attributes #0 = { "min-legal-vector-width"="0" "target-cpu"="skylake-avx512" }