[PATCH] D20443: [PowerPC] - Combine loads of v4i8 to loads of i32 followed by bitcast

Nemanja Ivanovic via llvm-commits llvm-commits at lists.llvm.org
Thu May 19 11:50:33 PDT 2016


nemanjai updated this revision to Diff 57832.
nemanjai added a comment.

Added the test case that I forgot in the initial patch.


Repository:
  rL LLVM

http://reviews.llvm.org/D20443

Files:
  lib/Target/PowerPC/PPCISelLowering.cpp
  test/CodeGen/PowerPC/load-v4i8-improved.ll

Index: test/CodeGen/PowerPC/load-v4i8-improved.ll
===================================================================
--- test/CodeGen/PowerPC/load-v4i8-improved.ll
+++ test/CodeGen/PowerPC/load-v4i8-improved.ll
@@ -0,0 +1,23 @@
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s \
+; RUN:   --check-prefix=CHECK-BE
+
+define <16 x i8> @test(i32* %s, i32* %t) {
+entry:
+  %0 = bitcast i32* %s to <4 x i8>*
+  %1 = load <4 x i8>, <4 x i8>* %0, align 4
+  %2 = shufflevector <4 x i8> %1, <4 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  ret <16 x i8> %2
+; CHECK: lwz [[GPR:[0-9]+]], 0(3)
+; CHECK: mtvsrd [[VSR:[0-9]+]], [[GPR]]
+; CHECK: xxswapd  [[SWP:[0-9]+]], [[VSR]]
+; CHECK: xxspltw 34, [[SWP]], 3
+; CHECK-NOT: vmrg
+; CHECK-NOT: vperm
+; CHECK-BE: lwz [[GPR:[0-9]+]], 0(3)
+; CHECK-BE: sldi [[SHL:[0-9]+]], [[GPR]], 32
+; CHECK-BE: mtvsrd [[VSR:[0-9]+]], [[SHL]]
+; CHECK-BE: xxspltw 34, [[VSR]], 0
+; CHECK-BE-NOT: vmrg
+; CHECK-BE-NOT: vperm
+}
Index: lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- lib/Target/PowerPC/PPCISelLowering.cpp
+++ lib/Target/PowerPC/PPCISelLowering.cpp
@@ -10566,6 +10566,20 @@
           (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
            LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
         return expandVSXLoadForLE(N, DCI);
+
+      // When we load a v4i8, the code can degrade rather quickly. Convert
+      // this to an i32 load and bitcast.
+      if (LoadVT == MVT::v4i8) {
+        SDValue ScalarLoad = DAG.getLoad(MVT::i32, dl, LD->getChain(),
+                                         LD->getBasePtr(), LD->getPointerInfo(),
+                                         false, LD->isNonTemporal(),
+                                         LD->isInvariant(), LD->getAlignment(),
+                                         LD->getAAInfo());
+        SDValue BitCast = DAG.getBitcast(MVT::v4i8, ScalarLoad);
+        return DAG.getNode(ISD::MERGE_VALUES, dl,
+                           DAG.getVTList(MVT::v4i8, MVT::Other),
+                           BitCast, ScalarLoad.getValue(1));
+      }
     }
 
     // We sometimes end up with a 64-bit integer load, from which we extract


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D20443.57832.patch
Type: text/x-patch
Size: 2410 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160519/18e75c05/attachment.bin>


More information about the llvm-commits mailing list