[llvm] r265593 - [PPC] Use VSX/FP Facility integer load when an integer load's only users are conversion to FP

Wed Apr 6 13:12:30 PDT 2016

Author: amehsan
Date: Wed Apr  6 15:12:29 2016
New Revision: 265593

URL: http://llvm.org/viewvc/llvm-project?rev=265593&view=rev
Log:
[PPC] Use VSX/FP Facility integer load when an integer load's only users are conversion to FP

http://reviews.llvm.org/D18405

When the integer value loaded is never used directly as integer we should use VSX 
or Floating Point Facility integer loads and avoid extra direct move


Added:
    llvm/trunk/test/CodeGen/PowerPC/direct-move-profit.ll
Modified:
    llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp

Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=265593&r1=265592&r2=265593&view=diff
==============================================================================

--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Wed Apr  6 15:12:29 2016
@@ -6504,6 +6504,30 @@ void PPCTargetLowering::spliceIntoChain(
   DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
 }
 
+/// \brief Analyze profitability of direct move
+/// prefer float load to int load plus direct move
+/// when there is no integer use of int load
+static bool directMoveIsProfitable(const SDValue &Op) {
+  SDNode *Origin = Op.getOperand(0).getNode();
+  if (Origin->getOpcode() != ISD::LOAD)
+    return true;
+
+  for (SDNode::use_iterator UI = Origin->use_begin(),
+                            UE = Origin->use_end();
+       UI != UE; ++UI) {
+
+    // Only look at the users of the loaded value.
+    if (UI.getUse().get().getResNo() != 0)
+      continue;
+
+    if (UI->getOpcode() != ISD::SINT_TO_FP &&
+        UI->getOpcode() != ISD::UINT_TO_FP)
+      return true;
+  }
+
+  return false;
+}
+
 /// \brief Custom lowers integer to floating point conversions to use
 /// the direct move instructions available in ISA 2.07 to avoid the
 /// need for load/store combinations.
@@ -6572,7 +6596,8 @@ SDValue PPCTargetLowering::LowerINT_TO_F
 
   // If we have direct moves, we can do all the conversion, skip the store/load
   // however, without FPCVT we can't do most conversions.
-  if (Subtarget.hasDirectMove() && Subtarget.isPPC64() && Subtarget.hasFPCVT())
+  if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
+      Subtarget.isPPC64() && Subtarget.hasFPCVT())
     return LowerINT_TO_FPDirectMove(Op, DAG, dl);
 
   assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&

Added: llvm/trunk/test/CodeGen/PowerPC/direct-move-profit.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/direct-move-profit.ll?rev=265593&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/direct-move-profit.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/direct-move-profit.ll Wed Apr  6 15:12:29 2016
@@ -0,0 +1,83 @@
+; RUN: llc -O2 -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
+
+; Function Attrs: norecurse nounwind
+define void @test1(float* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* nocapture readnone %c, i32 signext %n) #0 {
+
+; CHECK-LABEL: test1
+
+entry:
+  %idxprom = sext i32 %n to i64
+  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %idxprom
+  %0 = load i32, i32* %arrayidx, align 4, !tbaa !1
+  %conv = sitofp i32 %0 to float
+  %mul = fmul float %conv, 0x4002916880000000
+  %arrayidx2 = getelementptr inbounds float, float* %a, i64 %idxprom
+  store float %mul, float* %arrayidx2, align 4, !tbaa !5
+  ret void
+
+; CHECK-NOT: mtvsrwa
+; CHECK-NOT: mtfprwa
+; CHECK: lxsiwax [[REG:[0-9]+]], {{.*}}
+; CHECK-NOT: mtvsrwa
+; CHECK-NOT: mtfprwa
+; CHECK: xscvsxdsp {{.*}}, [[REG]]
+; CHECK-NOT: mtvsrwa
+; CHECK-NOT: mtfprwa
+; CHECK: blr
+
+}
+
+; Function Attrs: norecurse nounwind readonly
+define float @test2(i32* nocapture readonly %b) #0 {
+
+; CHECK-LABEL: test2
+
+entry:
+  %0 = load i32, i32* %b, align 4, !tbaa !1
+  %conv = sitofp i32 %0 to float
+  %mul = fmul float %conv, 0x40030A3D80000000
+  ret float %mul
+
+; CHECK-NOT: mtvsrwa
+; CHECK-NOT: mtfprwa
+; CHECK: lxsiwax [[REG:[0-9]+]], {{.*}}
+; CHECK-NOT: mtvsrwa
+; CHECK-NOT: mtfprwa
+; CHECK: xscvsxdsp {{.*}}, [[REG]]
+; CHECK-NOT: mtvsrwa
+; CHECK-NOT: mtfprwa
+; CHECK: blr
+
+}
+
+; Function Attrs: norecurse nounwind
+define void @test3(float* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* noalias nocapture %c, i32 signext %n) #0 {
+
+; CHECK-LABEL: test3
+
+entry:
+  %idxprom = sext i32 %n to i64
+  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %idxprom
+  %0 = load i32, i32* %arrayidx, align 4, !tbaa !1
+  %conv = sitofp i32 %0 to float
+  %mul = fmul float %conv, 0x4002916880000000
+  %arrayidx2 = getelementptr inbounds float, float* %a, i64 %idxprom
+  store float %mul, float* %arrayidx2, align 4, !tbaa !5
+  %arrayidx6 = getelementptr inbounds i32, i32* %c, i64 %idxprom
+  %1 = load i32, i32* %arrayidx6, align 4, !tbaa !1
+  %add = add nsw i32 %1, %0
+  store i32 %add, i32* %arrayidx6, align 4, !tbaa !1
+  ret void
+
+; CHECK: mtvsrwa
+; CHECK: blr
+
+}
+
+!0 = !{!"clang version 3.9.0 (http://llvm.org/git/clang.git b88a395e7ba26c0fb96cd99a2a004d76f4f41d0c) (http://llvm.org/git/llvm.git 1ac3fbac0f5b037c17c0b0f9d271c32c4d7ca1b5)"}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"int", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C++ TBAA"}
+!5 = !{!6, !6, i64 0}
+!6 = !{!"float", !3, i64 0}