[llvm] r225301 - [PowerPC] Reuse a load operand in int->fp conversions

Tue Jan 6 14:31:02 PST 2015

Author: hfinkel
Date: Tue Jan  6 16:31:02 2015
New Revision: 225301

URL: http://llvm.org/viewvc/llvm-project?rev=225301&view=rev
Log:
[PowerPC] Reuse a load operand in int->fp conversions

int->fp conversions on PPC must be done through memory loads and stores. On a
modern core, this process begins by storing the int value to memory, then
loading it using a (sometimes special) FP load instruction. Unfortunately, we
would do this even when the value to be converted was itself a load, and we can
just use that same memory location instead of copying it to another first.
There is a slight complication when handling int_to_fp(fp_to_int(x)) pairs,
because the fp_to_int operand has not been lowered when the int_to_fp is being
lowered. We handle this specially by invoking fp_to_int's lowering logic
(partially) and getting the necessary memory location (some trivial refactoring
was done to make this possible).

This is all somewhat ugly, and it would be nice if some later CodeGen stage
could just clean this stuff up, but because doing so would involve modifying
target-specific nodes (or instructions), it is not immediately clear how that
would work.

Also, remove a related entry from the README.txt for which we now generate
reasonable code.

Added:
    llvm/trunk/test/CodeGen/PowerPC/no-extra-fp-conv-ldst.ll
Modified:
    llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
    llvm/trunk/lib/Target/PowerPC/README.txt

Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=225301&r1=225300&r2=225301&view=diff
==============================================================================

--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Tue Jan  6 16:31:02 2015
@@ -5408,9 +5408,9 @@ SDValue PPCTargetLowering::LowerSELECT_C
   return Op;
 }
 
-// FIXME: Split this code up when LegalizeDAGTypes lands.
-SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
-                                           SDLoc dl) const {
+void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
+                                               SelectionDAG &DAG,
+                                               SDLoc dl) const {
   assert(Op.getOperand(0).getValueType().isFloatingPoint());
   SDValue Src = Op.getOperand(0);
   if (Src.getValueType() == MVT::f32)
@@ -5459,15 +5459,92 @@ SDValue PPCTargetLowering::LowerFP_TO_IN
   if (Op.getValueType() == MVT::i32 && !i32Stack) {
     FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
                         DAG.getConstant(4, FIPtr.getValueType()));
-    MPI = MachinePointerInfo();
+    MPI = MPI.getWithOffset(4);
   }
 
-  return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MPI,
-                     false, false, false, 0);
+  RLI.Chain = Chain;
+  RLI.Ptr = FIPtr;
+  RLI.MPI = MPI;
+}
+
+SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
+                                          SDLoc dl) const {
+  ReuseLoadInfo RLI;
+  LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
+
+  return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI, false,
+                     false, RLI.IsInvariant, RLI.Alignment, RLI.AAInfo,
+                     RLI.Ranges);
+}
+
+// We're trying to insert a regular store, S, and then a load, L. If the
+// incoming value, O, is a load, we might just be able to have our load use the
+// address used by O. However, we don't know if anything else will store to
+// that address before we can load from it. To prevent this situation, we need
+// to insert our load, L, into the chain as a peer of O. To do this, we give L
+// the same chain operand as O, we create a token factor from the chain results
+// of O and L, and we replace all uses of O's chain result with that token
+// factor (see spliceIntoChain below for this last part).
+bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
+                                            ReuseLoadInfo &RLI,
+                                            SelectionDAG &DAG) const {
+  SDLoc dl(Op);
+  if ((Op.getOpcode() == ISD::FP_TO_UINT ||
+       Op.getOpcode() == ISD::FP_TO_SINT) &&
+      isOperationLegalOrCustom(Op.getOpcode(),
+                               Op.getOperand(0).getValueType())) {
+
+    LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
+    return true;
+  }
+
+  LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
+  if (!LD || !ISD::isNON_EXTLoad(LD) || LD->isVolatile() || LD->isNonTemporal())
+    return false;
+  if (LD->getMemoryVT() != MemVT)
+    return false;
+
+  RLI.Ptr = LD->getBasePtr();
+  if (LD->isIndexed() && LD->getOffset().getOpcode() != ISD::UNDEF) {
+    assert(LD->getAddressingMode() == ISD::PRE_INC &&
+           "Non-pre-inc AM on PPC?");
+    RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
+                          LD->getOffset());
+  }
+
+  RLI.Chain = LD->getChain();
+  RLI.MPI = LD->getPointerInfo();
+  RLI.IsInvariant = LD->isInvariant();
+  RLI.Alignment = LD->getAlignment();
+  RLI.AAInfo = LD->getAAInfo();
+  RLI.Ranges = LD->getRanges();
+
+  RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
+  return true;
+}
+
+// Given the head of the old chain, ResChain, insert a token factor containing
+// it and NewResChain, and make users of ResChain now be users of that token
+// factor.
+void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
+                                        SDValue NewResChain,
+                                        SelectionDAG &DAG) const {
+  if (!ResChain)
+    return;
+
+  SDLoc dl(NewResChain);
+
+  SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                           NewResChain, DAG.getUNDEF(MVT::Other));
+  assert(TF.getNode() != NewResChain.getNode() &&
+         "A new TF really is required here");
+
+  DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
+  DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
 }
 
 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
-                                           SelectionDAG &DAG) const {
+                                          SelectionDAG &DAG) const {
   SDLoc dl(Op);
   // Don't handle ppc_fp128 here; let it be lowered to a libcall.
   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
@@ -5539,7 +5616,17 @@ SDValue PPCTargetLowering::LowerINT_TO_F
       SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
     }
 
-    SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
+    ReuseLoadInfo RLI;
+    SDValue Bits;
+
+    if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
+      Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI, false,
+                         false, RLI.IsInvariant, RLI.Alignment, RLI.AAInfo,
+                         RLI.Ranges);
+      spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
+    } else
+      Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
+
     SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
 
     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
@@ -5560,23 +5647,36 @@ SDValue PPCTargetLowering::LowerINT_TO_F
 
   SDValue Ld;
   if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
-    int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
-    SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
-
-    SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
-                                 MachinePointerInfo::getFixedStack(FrameIdx),
-                                 false, false, 0);
+    ReuseLoadInfo RLI;
+    bool ReusingLoad;
+    if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI,
+                                            DAG))) {
+      int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
+      SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+      SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
+                                   MachinePointerInfo::getFixedStack(FrameIdx),
+                                   false, false, 0);
+
+      assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
+             "Expected an i32 store");
+
+      RLI.Ptr = FIdx;
+      RLI.Chain = Store;
+      RLI.MPI = MachinePointerInfo::getFixedStack(FrameIdx);
+      RLI.Alignment = 4;
+    }
 
-    assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
-           "Expected an i32 store");
     MachineMemOperand *MMO =
-      MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
-                              MachineMemOperand::MOLoad, 4, 4);
-    SDValue Ops[] = { Store, FIdx };
+      MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
+                              RLI.Alignment, RLI.AAInfo, RLI.Ranges);
+    SDValue Ops[] = { RLI.Chain, RLI.Ptr };
     Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
                                    PPCISD::LFIWZX : PPCISD::LFIWAX,
                                  dl, DAG.getVTList(MVT::f64, MVT::Other),
                                  Ops, MVT::i32, MMO);
+    if (ReusingLoad)
+      spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
   } else {
     assert(Subtarget.isPPC64() &&
            "i32->FP without LFIWAX supported only on PPC64");
@@ -6489,7 +6589,7 @@ SDValue PPCTargetLowering::LowerOperatio
   case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
   case ISD::FP_TO_UINT:
   case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG,
-                                                       SDLoc(Op));
+                                                      SDLoc(Op));
   case ISD::UINT_TO_FP:
   case ISD::SINT_TO_FP:         return LowerINT_TO_FP(Op, DAG);
   case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);

Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h?rev=225301&r1=225300&r2=225301&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h Tue Jan  6 16:31:02 2015
@@ -591,6 +591,28 @@ namespace llvm {
     }
 
   private:
+
+    struct ReuseLoadInfo {
+      SDValue Ptr;
+      SDValue Chain;
+      SDValue ResChain;
+      MachinePointerInfo MPI;
+      bool IsInvariant;
+      unsigned Alignment;
+      AAMDNodes AAInfo;
+      const MDNode *Ranges;
+
+      ReuseLoadInfo() : IsInvariant(false), Alignment(0), Ranges(nullptr) {}
+    };
+
+    bool canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI,
+                             SelectionDAG &DAG) const;
+    void spliceIntoChain(SDValue ResChain, SDValue NewResChain,
+                         SelectionDAG &DAG) const;
+
+    void LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
+                                SelectionDAG &DAG, SDLoc dl) const;
+
     SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
     SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
 

Modified: llvm/trunk/lib/Target/PowerPC/README.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/README.txt?rev=225301&r1=225300&r2=225301&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/README.txt (original)
+++ llvm/trunk/lib/Target/PowerPC/README.txt Tue Jan  6 16:31:02 2015
@@ -302,27 +302,6 @@ http://www.lcs.mit.edu/pubs/pdf/MIT-LCS-
 
 ===-------------------------------------------------------------------------===
 
-float foo(float X) { return (int)(X); }
-
-Currently produces:
-
-_foo:
-        fctiwz f0, f1
-        stfd f0, -8(r1)
-        lwz r2, -4(r1)
-        extsw r2, r2
-        std r2, -16(r1)
-        lfd f0, -16(r1)
-        fcfid f0, f0
-        frsp f1, f0
-        blr
-
-We could use a target dag combine to turn the lwz/extsw into an lwa when the 
-lwz has a single use.  Since LWA is cracked anyway, this would be a codesize
-win only.
-
-===-------------------------------------------------------------------------===
-
 We generate ugly code for this:
 
 void func(unsigned int *ret, float dx, float dy, float dz, float dw) {

Added: llvm/trunk/test/CodeGen/PowerPC/no-extra-fp-conv-ldst.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/no-extra-fp-conv-ldst.ll?rev=225301&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/no-extra-fp-conv-ldst.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/no-extra-fp-conv-ldst.ll Tue Jan  6 16:31:02 2015
@@ -0,0 +1,96 @@
+; RUN: llc -mcpu=a2 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind readonly
+define double @test1(i64* nocapture readonly %x) #0 {
+entry:
+  %0 = load i64* %x, align 8
+  %conv = sitofp i64 %0 to double
+  ret double %conv
+
+; CHECK-LABEL: @test1
+; CHECK: lfd [[REG1:[0-9]+]], 0(3)
+; CHECK: fcfid 1, [[REG1]]
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readonly
+define double @test2(i32* nocapture readonly %x) #0 {
+entry:
+  %0 = load i32* %x, align 4
+  %conv = sitofp i32 %0 to double
+  ret double %conv
+
+; CHECK-LABEL: @test2
+; CHECK: lfiwax [[REG1:[0-9]+]], 0, 3
+; CHECK: fcfid 1, [[REG1]]
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readnone
+define float @foo(float %X) #0 {
+entry:
+  %conv = fptosi float %X to i32
+  %conv1 = sitofp i32 %conv to float
+  ret float %conv1
+
+; CHECK-LABEL: @foo
+; CHECK-DAG: fctiwz [[REG2:[0-9]+]], 1
+; CHECK-DAG: addi [[REG1:[0-9]+]], 1,
+; CHECK: stfiwx [[REG2]], 0, [[REG1]]
+; CHECK: lfiwax [[REG3:[0-9]+]], 0, [[REG1]]
+; CHECK: fcfids 1, [[REG3]]
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readnone
+define double @food(double %X) #0 {
+entry:
+  %conv = fptosi double %X to i32
+  %conv1 = sitofp i32 %conv to double
+  ret double %conv1
+
+; CHECK-LABEL: @food
+; CHECK-DAG: fctiwz [[REG2:[0-9]+]], 1
+; CHECK-DAG: addi [[REG1:[0-9]+]], 1,
+; CHECK: stfiwx [[REG2]], 0, [[REG1]]
+; CHECK: lfiwax [[REG3:[0-9]+]], 0, [[REG1]]
+; CHECK: fcfid 1, [[REG3]]
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readnone
+define float @foou(float %X) #0 {
+entry:
+  %conv = fptoui float %X to i32
+  %conv1 = uitofp i32 %conv to float
+  ret float %conv1
+
+; CHECK-LABEL: @foou
+; CHECK-DAG: fctiwuz [[REG2:[0-9]+]], 1
+; CHECK-DAG: addi [[REG1:[0-9]+]], 1,
+; CHECK: stfiwx [[REG2]], 0, [[REG1]]
+; CHECK: lfiwzx [[REG3:[0-9]+]], 0, [[REG1]]
+; CHECK: fcfidus 1, [[REG3]]
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readnone
+define double @fooud(double %X) #0 {
+entry:
+  %conv = fptoui double %X to i32
+  %conv1 = uitofp i32 %conv to double
+  ret double %conv1
+
+; CHECK-LABEL: @fooud
+; CHECK-DAG: fctiwuz [[REG2:[0-9]+]], 1
+; CHECK-DAG: addi [[REG1:[0-9]+]], 1,
+; CHECK: stfiwx [[REG2]], 0, [[REG1]]
+; CHECK: lfiwzx [[REG3:[0-9]+]], 0, [[REG1]]
+; CHECK: fcfidu 1, [[REG3]]
+; CHECK: blr
+}
+
+attributes #0 = { nounwind readonly }
+