[llvm] r320930 - [X86][AVX] Fix failed broadcast fold

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sat Dec 16 14:57:17 PST 2017


Author: rksimon
Date: Sat Dec 16 14:57:17 2017
New Revision: 320930

URL: http://llvm.org/viewvc/llvm-project?rev=320930&view=rev
Log:
[X86][AVX] Fix failed broadcast fold

Strip excess BITCASTs from EXTRACT_SUBVECTOR input

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/widened-broadcast.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=320930&r1=320929&r2=320930&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Dec 16 14:57:17 2017
@@ -10483,9 +10483,13 @@ static SDValue lowerVectorShuffleAsBroad
 
   // We only support broadcasting from 128-bit vectors to minimize the
   // number of patterns we need to deal with in isel. So extract down to
-  // 128-bits.
-  if (SrcVT.getSizeInBits() > 128)
-    V = extract128BitVector(V, 0, DAG, DL);
+  // 128-bits, removing as many bitcasts as possible.
+  if (SrcVT.getSizeInBits() > 128) {
+    MVT ExtVT = MVT::getVectorVT(SrcVT.getScalarType(),
+                                 128 / SrcVT.getScalarSizeInBits());
+    V = extract128BitVector(peekThroughBitcasts(V), 0, DAG, DL);
+    V = DAG.getBitcast(ExtVT, V);
+  }
 
   return DAG.getBitcast(VT, DAG.getNode(Opcode, DL, BroadcastVT, V));
 }

Modified: llvm/trunk/test/CodeGen/X86/widened-broadcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widened-broadcast.ll?rev=320930&r1=320929&r2=320930&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/widened-broadcast.ll (original)
+++ llvm/trunk/test/CodeGen/X86/widened-broadcast.ll Sat Dec 16 14:57:17 2017
@@ -526,22 +526,10 @@ define <32 x i8> @load_splat_32i8_32i8_0
 ; SSE-NEXT:    movdqa %xmm0, %xmm1
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: load_splat_32i8_32i8_01230123012301230123012301230123:
-; AVX1:       # %bb.0: # %entry
-; AVX1-NEXT:    vbroadcastss (%rdi), %ymm0
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: load_splat_32i8_32i8_01230123012301230123012301230123:
-; AVX2:       # %bb.0: # %entry
-; AVX2-NEXT:    vmovaps (%rdi), %ymm0
-; AVX2-NEXT:    vbroadcastss %xmm0, %ymm0
-; AVX2-NEXT:    retq
-;
-; AVX512-LABEL: load_splat_32i8_32i8_01230123012301230123012301230123:
-; AVX512:       # %bb.0: # %entry
-; AVX512-NEXT:    vmovaps (%rdi), %ymm0
-; AVX512-NEXT:    vbroadcastss %xmm0, %ymm0
-; AVX512-NEXT:    retq
+; AVX-LABEL: load_splat_32i8_32i8_01230123012301230123012301230123:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vbroadcastss (%rdi), %ymm0
+; AVX-NEXT:    retq
 entry:
   %ld = load <32 x i8>, <32 x i8>* %ptr
   %ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>




More information about the llvm-commits mailing list