[llvm] 5f9f37c - [X86][AVX] Don't let X86ISD::BROADCAST peek through bitcasts to illegal types.

Fri May 8 04:31:48 PDT 2020

Author: Simon Pilgrim
Date: 2020-05-08T12:30:50+01:00
New Revision: 5f9f37c42a346e79a530ca1b12ec435b30d3c829

URL: https://github.com/llvm/llvm-project/commit/5f9f37c42a346e79a530ca1b12ec435b30d3c829
DIFF: https://github.com/llvm/llvm-project/commit/5f9f37c42a346e79a530ca1b12ec435b30d3c829.diff

LOG: [X86][AVX] Don't let X86ISD::BROADCAST peek through bitcasts to illegal types.

This was an existing bug exposed by the more aggressive X86ISD::BROADCAST generation by rG8817334ce3c7

Original test case thanks to @mstorsjo

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index cb83d7ef6cea..00a395921d3b 100644

--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -35565,7 +35565,8 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
     // broadcast(bitcast(src)) -> bitcast(broadcast(src))
     // 32-bit targets have to bitcast i64 to f64, so better to bitcast upward.
     if (Src.getOpcode() == ISD::BITCAST &&
-        SrcVT.getScalarSizeInBits() == BCVT.getScalarSizeInBits()) {
+        SrcVT.getScalarSizeInBits() == BCVT.getScalarSizeInBits() &&
+        DAG.getTargetLoweringInfo().isTypeLegal(BCVT)) {
       EVT NewVT = EVT::getVectorVT(*DAG.getContext(), BCVT.getScalarType(),
                                    VT.getVectorNumElements());
       return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, DL, NewVT, BC));

diff  --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
index bbd5bcb29a98..076fed814478 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
@@ -778,6 +778,32 @@ define <32 x i8> @constant_fold_pshufb_256() {
   ret <32 x i8> %1
 }
 
+define i32 @broadcast_v2i64_multiuse(i64* %p0) {
+; X86-LABEL: broadcast_v2i64_multiuse:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; X86-NEXT:    vextractps $2, %xmm0, %eax
+; X86-NEXT:    addl (%ecx), %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: broadcast_v2i64_multiuse:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl (%rdi), %eax
+; X64-NEXT:    addl %eax, %eax
+; X64-NEXT:    retq
+entry:
+  %tmp = load i64, i64* %p0, align 8
+  %tmp1 = trunc i64 %tmp to i32
+  %tmp2 = insertelement <2 x i64> undef, i64 %tmp, i32 0
+  %tmp3 = shufflevector <2 x i64> %tmp2, <2 x i64> undef, <2 x i32> zeroinitializer
+  %tmp4 = trunc <2 x i64> %tmp3 to <2 x i32>
+  %tmp5 = extractelement <2 x i32> %tmp4, i32 1
+  %tmp6 = add i32 %tmp1, %tmp5
+  ret i32 %tmp6
+}
+
 define <32 x i8> @PR27320(<8 x i32> %a0) {
 ; CHECK-LABEL: PR27320:
 ; CHECK:       # %bb.0: