[llvm] r257055 - [X86][AVX] Match broadcast loads through a bitcast

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 7 03:34:27 PST 2016


Author: rksimon
Date: Thu Jan  7 05:34:27 2016
New Revision: 257055

URL: http://llvm.org/viewvc/llvm-project?rev=257055&view=rev
Log:
[X86][AVX] Match broadcast loads through a bitcast

AVX1 v8i32/v4i64 shuffles are bitcasted to v8f32/v4f64, this patch peeks through bitcasts to check for a load node to allow broadcasts to occur.

Follow up to D15310

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/2012-01-12-extract-sv.ll
    llvm/trunk/test/CodeGen/X86/avx-vbroadcast.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=257055&r1=257054&r2=257055&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Jan  7 05:34:27 2016
@@ -8163,6 +8163,11 @@ static SDValue lowerVectorShuffleAsBroad
     break;
   }
 
+  // Peek through any bitcast (only useful for loads).
+  SDValue BC = V;
+  while (BC.getOpcode() == ISD::BITCAST)
+    BC = BC.getOperand(0);
+
   // Check if this is a broadcast of a scalar. We special case lowering
   // for scalars so that we can more effectively fold with loads.
   // First, look through bitcast: if the original value has a larger element
@@ -8182,10 +8187,10 @@ static SDValue lowerVectorShuffleAsBroad
     // Only AVX2 has register broadcasts.
     if (!Subtarget->hasAVX2() && !isShuffleFoldableLoad(V))
       return SDValue();
-  } else if (MayFoldLoad(V) && !cast<LoadSDNode>(V)->isVolatile()) {
+  } else if (MayFoldLoad(BC) && !cast<LoadSDNode>(BC)->isVolatile()) {
     // If we are broadcasting a load that is only used by the shuffle
     // then we can reduce the vector load to the broadcasted scalar load.
-    LoadSDNode *Ld = cast<LoadSDNode>(V);
+    LoadSDNode *Ld = cast<LoadSDNode>(BC);
     SDValue BaseAddr = Ld->getOperand(1);
     EVT AddrVT = BaseAddr.getValueType();
     EVT SVT = VT.getScalarType();

Modified: llvm/trunk/test/CodeGen/X86/2012-01-12-extract-sv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2012-01-12-extract-sv.ll?rev=257055&r1=257054&r2=257055&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/2012-01-12-extract-sv.ll (original)
+++ llvm/trunk/test/CodeGen/X86/2012-01-12-extract-sv.ll Thu Jan  7 05:34:27 2016
@@ -3,9 +3,7 @@
 define void @endless_loop() {
 ; CHECK-LABEL: endless_loop:
 ; CHECK-NEXT:  # BB#0:
-; CHECK-NEXT:    vmovaps (%eax), %ymm0
-; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; CHECK-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
+; CHECK-NEXT:    vbroadcastss (%eax), %ymm0
 ; CHECK-NEXT:    vmovddup {{.*#+}} xmm1 = xmm0[0,0]
 ; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2

Modified: llvm/trunk/test/CodeGen/X86/avx-vbroadcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-vbroadcast.ll?rev=257055&r1=257054&r2=257055&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-vbroadcast.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-vbroadcast.ll Thu Jan  7 05:34:27 2016
@@ -130,10 +130,7 @@ entry:
 define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtable readnone ssp {
 ; CHECK-LABEL: load_splat_8i32_8i32_55555555:
 ; CHECK:       ## BB#0: ## %entry
-; CHECK-NEXT:    vmovaps (%rdi), %ymm0
-; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; CHECK-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT:    vbroadcastss 20(%rdi), %ymm0
 ; CHECK-NEXT:    retq
 entry:
   %ld = load <8 x i32>, <8 x i32>* %ptr
@@ -201,10 +198,7 @@ entry:
 define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable readnone ssp {
 ; CHECK-LABEL: load_splat_4i64_4i64_2222:
 ; CHECK:       ## BB#0: ## %entry
-; CHECK-NEXT:    vmovapd (%rdi), %ymm0
-; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
-; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT:    vbroadcastsd 16(%rdi), %ymm0
 ; CHECK-NEXT:    retq
 entry:
   %ld = load <4 x i64>, <4 x i64>* %ptr




More information about the llvm-commits mailing list