[llvm] r320930 - [X86][AVX] Fix failed broadcast fold
    Simon Pilgrim via llvm-commits 
    llvm-commits at lists.llvm.org
       
    Sat Dec 16 14:57:17 PST 2017
    
    
  
Author: rksimon
Date: Sat Dec 16 14:57:17 2017
New Revision: 320930
URL: http://llvm.org/viewvc/llvm-project?rev=320930&view=rev
Log:
[X86][AVX] Fix failed broadcast fold
Strip excess BITCASTs from EXTRACT_SUBVECTOR input
Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/widened-broadcast.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=320930&r1=320929&r2=320930&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Dec 16 14:57:17 2017
@@ -10483,9 +10483,13 @@ static SDValue lowerVectorShuffleAsBroad
 
   // We only support broadcasting from 128-bit vectors to minimize the
   // number of patterns we need to deal with in isel. So extract down to
-  // 128-bits.
-  if (SrcVT.getSizeInBits() > 128)
-    V = extract128BitVector(V, 0, DAG, DL);
+  // 128-bits, removing as many bitcasts as possible.
+  if (SrcVT.getSizeInBits() > 128) {
+    MVT ExtVT = MVT::getVectorVT(SrcVT.getScalarType(),
+                                 128 / SrcVT.getScalarSizeInBits());
+    V = extract128BitVector(peekThroughBitcasts(V), 0, DAG, DL);
+    V = DAG.getBitcast(ExtVT, V);
+  }
 
   return DAG.getBitcast(VT, DAG.getNode(Opcode, DL, BroadcastVT, V));
 }
Modified: llvm/trunk/test/CodeGen/X86/widened-broadcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widened-broadcast.ll?rev=320930&r1=320929&r2=320930&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/widened-broadcast.ll (original)
+++ llvm/trunk/test/CodeGen/X86/widened-broadcast.ll Sat Dec 16 14:57:17 2017
@@ -526,22 +526,10 @@ define <32 x i8> @load_splat_32i8_32i8_0
 ; SSE-NEXT:    movdqa %xmm0, %xmm1
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: load_splat_32i8_32i8_01230123012301230123012301230123:
-; AVX1:       # %bb.0: # %entry
-; AVX1-NEXT:    vbroadcastss (%rdi), %ymm0
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: load_splat_32i8_32i8_01230123012301230123012301230123:
-; AVX2:       # %bb.0: # %entry
-; AVX2-NEXT:    vmovaps (%rdi), %ymm0
-; AVX2-NEXT:    vbroadcastss %xmm0, %ymm0
-; AVX2-NEXT:    retq
-;
-; AVX512-LABEL: load_splat_32i8_32i8_01230123012301230123012301230123:
-; AVX512:       # %bb.0: # %entry
-; AVX512-NEXT:    vmovaps (%rdi), %ymm0
-; AVX512-NEXT:    vbroadcastss %xmm0, %ymm0
-; AVX512-NEXT:    retq
+; AVX-LABEL: load_splat_32i8_32i8_01230123012301230123012301230123:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vbroadcastss (%rdi), %ymm0
+; AVX-NEXT:    retq
 entry:
   %ld = load <32 x i8>, <32 x i8>* %ptr
   %ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
    
    
More information about the llvm-commits
mailing list