[PATCH] D20443: [PowerPC] - Combine loads of v4i8 to loads of i32 followed by bitcast
Nemanja Ivanovic via llvm-commits
llvm-commits at lists.llvm.org
Thu May 19 11:50:33 PDT 2016
nemanjai updated this revision to Diff 57832.
nemanjai added a comment.
Added the test case that I forgot in the initial patch.
Repository:
rL LLVM
http://reviews.llvm.org/D20443
Files:
lib/Target/PowerPC/PPCISelLowering.cpp
test/CodeGen/PowerPC/load-v4i8-improved.ll
Index: test/CodeGen/PowerPC/load-v4i8-improved.ll
===================================================================
--- test/CodeGen/PowerPC/load-v4i8-improved.ll
+++ test/CodeGen/PowerPC/load-v4i8-improved.ll
@@ -0,0 +1,23 @@
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s \
+; RUN: --check-prefix=CHECK-BE
+
+define <16 x i8> @test(i32* %s, i32* %t) {
+entry:
+ %0 = bitcast i32* %s to <4 x i8>*
+ %1 = load <4 x i8>, <4 x i8>* %0, align 4
+ %2 = shufflevector <4 x i8> %1, <4 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ ret <16 x i8> %2
+; CHECK: lwz [[GPR:[0-9]+]], 0(3)
+; CHECK: mtvsrd [[VSR:[0-9]+]], [[GPR]]
+; CHECK: xxswapd [[SWP:[0-9]+]], [[VSR]]
+; CHECK: xxspltw 34, [[SWP]], 3
+; CHECK-NOT: vmrg
+; CHECK-NOT: vperm
+; CHECK-BE: lwz [[GPR:[0-9]+]], 0(3)
+; CHECK-BE: sldi [[SHL:[0-9]+]], [[GPR]], 32
+; CHECK-BE: mtvsrd [[VSR:[0-9]+]], [[SHL]]
+; CHECK-BE: xxspltw 34, [[VSR]], 0
+; CHECK-BE-NOT: vmrg
+; CHECK-BE-NOT: vperm
+}
Index: lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- lib/Target/PowerPC/PPCISelLowering.cpp
+++ lib/Target/PowerPC/PPCISelLowering.cpp
@@ -10566,6 +10566,20 @@
(LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
return expandVSXLoadForLE(N, DCI);
+
+ // When we load a v4i8, the code can degrade rather quickly. Convert
+ // this to an i32 load and bitcast.
+ if (LoadVT == MVT::v4i8) {
+ SDValue ScalarLoad = DAG.getLoad(MVT::i32, dl, LD->getChain(),
+ LD->getBasePtr(), LD->getPointerInfo(),
+ false, LD->isNonTemporal(),
+ LD->isInvariant(), LD->getAlignment(),
+ LD->getAAInfo());
+ SDValue BitCast = DAG.getBitcast(MVT::v4i8, ScalarLoad);
+ return DAG.getNode(ISD::MERGE_VALUES, dl,
+ DAG.getVTList(MVT::v4i8, MVT::Other),
+ BitCast, ScalarLoad.getValue(1));
+ }
}
// We sometimes end up with a 64-bit integer load, from which we extract
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D20443.57832.patch
Type: text/x-patch
Size: 2410 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160519/18e75c05/attachment.bin>
More information about the llvm-commits
mailing list