[llvm] r189097 - Turn MipsOptimizeMathLibCalls into a target-independent scalar transform

Richard Sandiford rsandifo at linux.vnet.ibm.com
Fri Aug 23 03:27:03 PDT 2013


Author: rsandifo
Date: Fri Aug 23 05:27:02 2013
New Revision: 189097

URL: http://llvm.org/viewvc/llvm-project?rev=189097&view=rev
Log:
Turn MipsOptimizeMathLibCalls into a target-independent scalar transform

...so that it can be used for z too.  Most of the code is the same.
The only real change is to use TargetTransformInfo to test when a sqrt
instruction is available.

The pass is opt-in because at the moment it only handles sqrt.

Added:
    llvm/trunk/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
Removed:
    llvm/trunk/lib/Target/Mips/MipsOptimizeMathLibCalls.cpp
Modified:
    llvm/trunk/include/llvm-c/Transforms/Scalar.h
    llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
    llvm/trunk/include/llvm/InitializePasses.h
    llvm/trunk/include/llvm/LinkAllPasses.h
    llvm/trunk/include/llvm/Transforms/Scalar.h
    llvm/trunk/lib/Analysis/TargetTransformInfo.cpp
    llvm/trunk/lib/CodeGen/BasicTargetTransformInfo.cpp
    llvm/trunk/lib/Target/Mips/CMakeLists.txt
    llvm/trunk/lib/Target/Mips/Mips.h
    llvm/trunk/lib/Target/Mips/MipsTargetMachine.cpp
    llvm/trunk/lib/Target/SystemZ/SystemZTargetMachine.cpp
    llvm/trunk/lib/Transforms/Scalar/CMakeLists.txt
    llvm/trunk/lib/Transforms/Scalar/Scalar.cpp
    llvm/trunk/test/CodeGen/SystemZ/fp-sqrt-01.ll
    llvm/trunk/test/CodeGen/SystemZ/fp-sqrt-02.ll

Modified: llvm/trunk/include/llvm-c/Transforms/Scalar.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm-c/Transforms/Scalar.h?rev=189097&r1=189096&r2=189097&view=diff
==============================================================================
--- llvm/trunk/include/llvm-c/Transforms/Scalar.h (original)
+++ llvm/trunk/include/llvm-c/Transforms/Scalar.h Fri Aug 23 05:27:02 2013
@@ -74,6 +74,9 @@ void LLVMAddLoopUnswitchPass(LLVMPassMan
 /** See llvm::createMemCpyOptPass function. */
 void LLVMAddMemCpyOptPass(LLVMPassManagerRef PM);
 
+/** See llvm::createPartiallyInlineLibCallsPass function. */
+void LLVMAddPartiallyInlineLibCallsPass(LLVMPassManagerRef PM);
+
 /** See llvm::createPromoteMemoryToRegisterPass function. */
 void LLVMAddPromoteMemoryToRegisterPass(LLVMPassManagerRef PM);
 

Modified: llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h?rev=189097&r1=189096&r2=189097&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h (original)
+++ llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h Fri Aug 23 05:27:02 2013
@@ -262,6 +262,10 @@ public:
   /// getPopcntSupport - Return hardware support for population count.
   virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
 
+  /// haveFastSqrt -- Return true if the hardware has a fast square-root
+  /// instruction.
+  virtual bool haveFastSqrt(Type *Ty) const;
+
   /// getIntImmCost - Return the expected cost of materializing the given
   /// integer immediate of the specified type.
   virtual unsigned getIntImmCost(const APInt &Imm, Type *Ty) const;

Modified: llvm/trunk/include/llvm/InitializePasses.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/InitializePasses.h?rev=189097&r1=189096&r2=189097&view=diff
==============================================================================
--- llvm/trunk/include/llvm/InitializePasses.h (original)
+++ llvm/trunk/include/llvm/InitializePasses.h Fri Aug 23 05:27:02 2013
@@ -205,6 +205,7 @@ void initializeObjCARCContractPass(PassR
 void initializeObjCARCOptPass(PassRegistry&);
 void initializeOptimalEdgeProfilerPass(PassRegistry&);
 void initializeOptimizePHIsPass(PassRegistry&);
+void initializePartiallyInlineLibCallsPass(PassRegistry&);
 void initializePEIPass(PassRegistry&);
 void initializePHIEliminationPass(PassRegistry&);
 void initializePartialInlinerPass(PassRegistry&);

Modified: llvm/trunk/include/llvm/LinkAllPasses.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/LinkAllPasses.h?rev=189097&r1=189096&r2=189097&view=diff
==============================================================================
--- llvm/trunk/include/llvm/LinkAllPasses.h (original)
+++ llvm/trunk/include/llvm/LinkAllPasses.h Fri Aug 23 05:27:02 2013
@@ -163,6 +163,7 @@ namespace {
       (void) llvm::createLoopVectorizePass();
       (void) llvm::createSLPVectorizerPass();
       (void) llvm::createBBVectorizePass();
+      (void) llvm::createPartiallyInlineLibCallsPass();
 
       (void)new llvm::IntervalPartition();
       (void)new llvm::FindUsedTypes();

Modified: llvm/trunk/include/llvm/Transforms/Scalar.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Scalar.h?rev=189097&r1=189096&r2=189097&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Transforms/Scalar.h (original)
+++ llvm/trunk/include/llvm/Transforms/Scalar.h Fri Aug 23 05:27:02 2013
@@ -354,6 +354,13 @@ extern char &InstructionSimplifierID;
 FunctionPass *createLowerExpectIntrinsicPass();
 
 
+//===----------------------------------------------------------------------===//
+//
+// PartiallyInlineLibCalls - Tries to inline the fast path of library
+// calls such as sqrt.
+//
+FunctionPass *createPartiallyInlineLibCallsPass();
+
 } // End llvm namespace
 
 #endif

Modified: llvm/trunk/lib/Analysis/TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/TargetTransformInfo.cpp?rev=189097&r1=189096&r2=189097&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Analysis/TargetTransformInfo.cpp Fri Aug 23 05:27:02 2013
@@ -145,6 +145,10 @@ TargetTransformInfo::getPopcntSupport(un
   return PrevTTI->getPopcntSupport(IntTyWidthInBit);
 }
 
+bool TargetTransformInfo::haveFastSqrt(Type *Ty) const {
+  return PrevTTI->haveFastSqrt(Ty);
+}
+
 unsigned TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
   return PrevTTI->getIntImmCost(Imm, Ty);
 }
@@ -505,6 +509,10 @@ struct NoTTI : ImmutablePass, TargetTran
     return PSK_Software;
   }
 
+  bool haveFastSqrt(Type *Ty) const {
+    return false;
+  }
+
   unsigned getIntImmCost(const APInt &Imm, Type *Ty) const {
     return 1;
   }

Modified: llvm/trunk/lib/CodeGen/BasicTargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/BasicTargetTransformInfo.cpp?rev=189097&r1=189096&r2=189097&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/BasicTargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/CodeGen/BasicTargetTransformInfo.cpp Fri Aug 23 05:27:02 2013
@@ -83,6 +83,7 @@ public:
   virtual unsigned getJumpBufAlignment() const;
   virtual unsigned getJumpBufSize() const;
   virtual bool shouldBuildLookupTables() const;
+  virtual bool haveFastSqrt(Type *Ty) const;
 
   /// @}
 
@@ -182,6 +183,12 @@ bool BasicTTI::shouldBuildLookupTables()
        TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
 }
 
+bool BasicTTI::haveFastSqrt(Type *Ty) const {
+  const TargetLoweringBase *TLI = getTLI();
+  EVT VT = TLI->getValueType(Ty);
+  return TLI->isTypeLegal(VT) && TLI->isOperationLegalOrCustom(ISD::FSQRT, VT);
+}
+
 //===----------------------------------------------------------------------===//
 //
 // Calls used by the vectorizers.

Modified: llvm/trunk/lib/Target/Mips/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/CMakeLists.txt?rev=189097&r1=189096&r2=189097&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Mips/CMakeLists.txt (original)
+++ llvm/trunk/lib/Target/Mips/CMakeLists.txt Fri Aug 23 05:27:02 2013
@@ -35,7 +35,6 @@ add_llvm_target(MipsCodeGen
   MipsMachineFunction.cpp
   MipsModuleISelDAGToDAG.cpp
   MipsOs16.cpp
-  MipsOptimizeMathLibCalls.cpp
   MipsRegisterInfo.cpp
   MipsSEFrameLowering.cpp
   MipsSEInstrInfo.cpp

Modified: llvm/trunk/lib/Target/Mips/Mips.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/Mips.h?rev=189097&r1=189096&r2=189097&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Mips/Mips.h (original)
+++ llvm/trunk/lib/Target/Mips/Mips.h Fri Aug 23 05:27:02 2013
@@ -28,7 +28,6 @@ namespace llvm {
   FunctionPass *createMipsJITCodeEmitterPass(MipsTargetMachine &TM,
                                              JITCodeEmitter &JCE);
   FunctionPass *createMipsConstantIslandPass(MipsTargetMachine &tm);
-  FunctionPass *createMipsOptimizeMathLibCalls(MipsTargetMachine &TM);
 } // end namespace llvm;
 
 #endif

Removed: llvm/trunk/lib/Target/Mips/MipsOptimizeMathLibCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/MipsOptimizeMathLibCalls.cpp?rev=189096&view=auto
==============================================================================
--- llvm/trunk/lib/Target/Mips/MipsOptimizeMathLibCalls.cpp (original)
+++ llvm/trunk/lib/Target/Mips/MipsOptimizeMathLibCalls.cpp (removed)
@@ -1,175 +0,0 @@
-//===---- MipsOptimizeMathLibCalls.cpp - Optimize math lib calls.      ----===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass does an IR transformation which enables the backend to emit native
-// math instructions.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MipsTargetMachine.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-
-using namespace llvm;
-
-static cl::opt<bool> DisableOpt("disable-mips-math-optimization",
-                                cl::init(false),
-                                cl::desc("MIPS: Disable math lib call "
-                                         "optimization."), cl::Hidden);
-
-namespace {
-  class MipsOptimizeMathLibCalls : public FunctionPass {
-  public:
-    static char ID;
-
-    MipsOptimizeMathLibCalls(MipsTargetMachine &TM_) :
-      FunctionPass(ID), TM(TM_) {}
-
-    virtual const char *getPassName() const {
-      return "MIPS: Optimize calls to math library functions.";
-    }
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-
-    virtual bool runOnFunction(Function &F);
-
-  private:
-    /// Optimize calls to sqrt.
-    bool optimizeSQRT(CallInst *Call, Function *CalledFunc,
-                      BasicBlock &CurrBB,
-                      Function::iterator &BB);
-
-    const TargetMachine &TM;
-  };
-
-  char MipsOptimizeMathLibCalls::ID = 0;
-}
-
-FunctionPass *llvm::createMipsOptimizeMathLibCalls(MipsTargetMachine &TM) {
-  return new MipsOptimizeMathLibCalls(TM);
-}
-
-void MipsOptimizeMathLibCalls::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<TargetLibraryInfo>();
-  FunctionPass::getAnalysisUsage(AU);
-}
-
-bool MipsOptimizeMathLibCalls::runOnFunction(Function &F) {
-  if (DisableOpt)
-    return false;
-
-  const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
-
-  if (Subtarget.inMips16Mode())
-    return false;
-
-  bool Changed = false;
-  Function::iterator CurrBB;
-  const TargetLibraryInfo *LibInfo = &getAnalysis<TargetLibraryInfo>();
-
-  for (Function::iterator BB = F.begin(), BE = F.end(); BB != BE;) {
-    CurrBB = BB++;
-
-    for (BasicBlock::iterator II = CurrBB->begin(), IE = CurrBB->end();
-         II != IE; ++II) {
-      CallInst *Call = dyn_cast<CallInst>(&*II);
-      Function *CalledFunc;
-
-      if (!Call || !(CalledFunc = Call->getCalledFunction()))
-        continue;
-
-      LibFunc::Func LibFunc;
-      Attribute A = CalledFunc->getAttributes()
-        .getAttribute(AttributeSet::FunctionIndex, "use-soft-float");
-
-      // Skip if function has "use-soft-float" attribute.
-      if ((A.isStringAttribute() && (A.getValueAsString() == "true")) ||
-          TM.Options.UseSoftFloat)
-        continue;
-
-      // Skip if function either has local linkage or is not a known library
-      // function.
-      if (CalledFunc->hasLocalLinkage() || !CalledFunc->hasName() ||
-          !LibInfo->getLibFunc(CalledFunc->getName(), LibFunc))
-        continue;
-
-      switch (LibFunc) {
-      case LibFunc::sqrtf:
-      case LibFunc::sqrt:
-        if (optimizeSQRT(Call, CalledFunc, *CurrBB, BB))
-          break;
-        continue;
-      default:
-        continue;
-      }
-
-      Changed = true;
-      break;
-    }
-  }
-
-  return Changed;
-}
-
-bool MipsOptimizeMathLibCalls::optimizeSQRT(CallInst *Call,
-                                            Function *CalledFunc,
-                                            BasicBlock &CurrBB,
-                                            Function::iterator &BB) {
-  // There is no need to change the IR, since backend will emit sqrt
-  // instruction if the call has already been marked read-only.
-  if (Call->onlyReadsMemory())
-    return false;
-
-  // Do the following transformation:
-  //
-  // (before)
-  // dst = sqrt(src)
-  //
-  // (after)
-  // v0 = sqrt_noreadmem(src) # native sqrt instruction.
-  // if (v0 is a NaN)
-  //   v1 = sqrt(src)         # library call.
-  // dst = phi(v0, v1)
-  //
-
-  // Move all instructions following Call to newly created block JoinBB.
-  // Create phi and replace all uses.
-  BasicBlock *JoinBB = llvm::SplitBlock(&CurrBB, Call->getNextNode(), this);
-  IRBuilder<> Builder(JoinBB, JoinBB->begin());
-  PHINode *Phi = Builder.CreatePHI(Call->getType(), 2);
-  Call->replaceAllUsesWith(Phi);
-
-  // Create basic block LibCallBB and insert a call to library function sqrt.
-  BasicBlock *LibCallBB = BasicBlock::Create(CurrBB.getContext(), "call.sqrt",
-                                             CurrBB.getParent(), JoinBB);
-  Builder.SetInsertPoint(LibCallBB);
-  Instruction *LibCall = Call->clone();
-  Builder.Insert(LibCall);
-  Builder.CreateBr(JoinBB);
-
-  // Add attribute "readnone" so that backend can use a native sqrt instruction
-  // for this call. Insert a FP compare instruction and a conditional branch
-  // at the end of CurrBB.
-  Call->addAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone);
-  CurrBB.getTerminator()->eraseFromParent();
-  Builder.SetInsertPoint(&CurrBB);
-  Value *FCmp = Builder.CreateFCmpOEQ(Call, Call);
-  Builder.CreateCondBr(FCmp, JoinBB, LibCallBB);
-
-  // Add phi operands.
-  Phi->addIncoming(Call, &CurrBB);
-  Phi->addIncoming(LibCall, LibCallBB);
-
-  BB = JoinBB;
-  return true;
-}

Modified: llvm/trunk/lib/Target/Mips/MipsTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/MipsTargetMachine.cpp?rev=189097&r1=189096&r2=189097&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Mips/MipsTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/Mips/MipsTargetMachine.cpp Fri Aug 23 05:27:02 2013
@@ -32,6 +32,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Transforms/Scalar.h"
 using namespace llvm;
 
 
@@ -160,7 +161,7 @@ void MipsPassConfig::addIRPasses() {
     addPass(createMipsOs16(getMipsTargetMachine()));
   if (getMipsSubtarget().inMips16HardFloat())
     addPass(createMips16HardFloat(getMipsTargetMachine()));
-  addPass(createMipsOptimizeMathLibCalls(getMipsTargetMachine()));
+  addPass(createPartiallyInlineLibCallsPass());
 }
 // Install an instruction selector pass using
 // the ISelDag to gen Mips code.

Modified: llvm/trunk/lib/Target/SystemZ/SystemZTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZTargetMachine.cpp?rev=189097&r1=189096&r2=189097&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZTargetMachine.cpp Fri Aug 23 05:27:02 2013
@@ -10,6 +10,7 @@
 #include "SystemZTargetMachine.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Transforms/Scalar.h"
 
 using namespace llvm;
 
@@ -47,12 +48,18 @@ public:
     return getTM<SystemZTargetMachine>();
   }
 
+  virtual void addIRPasses() LLVM_OVERRIDE;
   virtual bool addInstSelector() LLVM_OVERRIDE;
   virtual bool addPreSched2() LLVM_OVERRIDE;
   virtual bool addPreEmitPass() LLVM_OVERRIDE;
 };
 } // end anonymous namespace
 
+void SystemZPassConfig::addIRPasses() {
+  TargetPassConfig::addIRPasses();
+  addPass(createPartiallyInlineLibCallsPass());
+}
+
 bool SystemZPassConfig::addInstSelector() {
   addPass(createSystemZISelDag(getSystemZTargetMachine(), getOptLevel()));
   return false;

Modified: llvm/trunk/lib/Transforms/Scalar/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/CMakeLists.txt?rev=189097&r1=189096&r2=189097&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/CMakeLists.txt (original)
+++ llvm/trunk/lib/Transforms/Scalar/CMakeLists.txt Fri Aug 23 05:27:02 2013
@@ -21,6 +21,7 @@ add_llvm_library(LLVMScalarOpts
   LoopUnswitch.cpp
   LowerAtomic.cpp
   MemCpyOptimizer.cpp
+  PartiallyInlineLibCalls.cpp
   Reassociate.cpp
   Reg2Mem.cpp
   SCCP.cpp

Added: llvm/trunk/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp?rev=189097&view=auto
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp (added)
+++ llvm/trunk/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp Fri Aug 23 05:27:02 2013
@@ -0,0 +1,156 @@
+//===--- PartiallyInlineLibCalls.cpp - Partially inline libcalls ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass tries to partially inline the fast path of well-known library
+// functions, such as using square-root instructions for cases where sqrt()
+// does not need to set errno.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "partially-inline-libcalls"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+namespace {
+  class PartiallyInlineLibCalls : public FunctionPass {
+  public:
+    static char ID;
+
+    PartiallyInlineLibCalls() :
+      FunctionPass(ID) {
+      initializePartiallyInlineLibCallsPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    virtual bool runOnFunction(Function &F);
+
+  private:
+    /// Optimize calls to sqrt.
+    bool optimizeSQRT(CallInst *Call, Function *CalledFunc,
+                      BasicBlock &CurrBB, Function::iterator &BB);
+  };
+
+  char PartiallyInlineLibCalls::ID = 0;
+}
+
+INITIALIZE_PASS(PartiallyInlineLibCalls, "partially-inline-libcalls",
+                "Partially inline calls to library functions", false, false)
+
+void PartiallyInlineLibCalls::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<TargetLibraryInfo>();
+  AU.addRequired<TargetTransformInfo>();
+  FunctionPass::getAnalysisUsage(AU);
+}
+
+bool PartiallyInlineLibCalls::runOnFunction(Function &F) {
+  bool Changed = false;
+  Function::iterator CurrBB;
+  TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+  const TargetTransformInfo *TTI = &getAnalysis<TargetTransformInfo>();
+  for (Function::iterator BB = F.begin(), BE = F.end(); BB != BE;) {
+    CurrBB = BB++;
+
+    for (BasicBlock::iterator II = CurrBB->begin(), IE = CurrBB->end();
+         II != IE; ++II) {
+      CallInst *Call = dyn_cast<CallInst>(&*II);
+      Function *CalledFunc;
+
+      if (!Call || !(CalledFunc = Call->getCalledFunction()))
+        continue;
+
+      // Skip if function either has local linkage or is not a known library
+      // function.
+      LibFunc::Func LibFunc;
+      if (CalledFunc->hasLocalLinkage() || !CalledFunc->hasName() ||
+          !TLI->getLibFunc(CalledFunc->getName(), LibFunc))
+        continue;
+
+      switch (LibFunc) {
+      case LibFunc::sqrtf:
+      case LibFunc::sqrt:
+        if (TTI->haveFastSqrt(Call->getType()) &&
+            optimizeSQRT(Call, CalledFunc, *CurrBB, BB))
+          break;
+        continue;
+      default:
+        continue;
+      }
+
+      Changed = true;
+      break;
+    }
+  }
+
+  return Changed;
+}
+
+bool PartiallyInlineLibCalls::optimizeSQRT(CallInst *Call,
+                                           Function *CalledFunc,
+                                           BasicBlock &CurrBB,
+                                           Function::iterator &BB) {
+  // There is no need to change the IR, since backend will emit sqrt
+  // instruction if the call has already been marked read-only.
+  if (Call->onlyReadsMemory())
+    return false;
+
+  // Do the following transformation:
+  //
+  // (before)
+  // dst = sqrt(src)
+  //
+  // (after)
+  // v0 = sqrt_noreadmem(src) # native sqrt instruction.
+  // if (v0 is a NaN)
+  //   v1 = sqrt(src)         # library call.
+  // dst = phi(v0, v1)
+  //
+
+  // Move all instructions following Call to newly created block JoinBB.
+  // Create phi and replace all uses.
+  BasicBlock *JoinBB = llvm::SplitBlock(&CurrBB, Call->getNextNode(), this);
+  IRBuilder<> Builder(JoinBB, JoinBB->begin());
+  PHINode *Phi = Builder.CreatePHI(Call->getType(), 2);
+  Call->replaceAllUsesWith(Phi);
+
+  // Create basic block LibCallBB and insert a call to library function sqrt.
+  BasicBlock *LibCallBB = BasicBlock::Create(CurrBB.getContext(), "call.sqrt",
+                                             CurrBB.getParent(), JoinBB);
+  Builder.SetInsertPoint(LibCallBB);
+  Instruction *LibCall = Call->clone();
+  Builder.Insert(LibCall);
+  Builder.CreateBr(JoinBB);
+
+  // Add attribute "readnone" so that backend can use a native sqrt instruction
+  // for this call. Insert a FP compare instruction and a conditional branch
+  // at the end of CurrBB.
+  Call->addAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone);
+  CurrBB.getTerminator()->eraseFromParent();
+  Builder.SetInsertPoint(&CurrBB);
+  Value *FCmp = Builder.CreateFCmpOEQ(Call, Call);
+  Builder.CreateCondBr(FCmp, JoinBB, LibCallBB);
+
+  // Add phi operands.
+  Phi->addIncoming(Call, &CurrBB);
+  Phi->addIncoming(LibCall, LibCallBB);
+
+  BB = JoinBB;
+  return true;
+}
+
+FunctionPass *llvm::createPartiallyInlineLibCallsPass() {
+  return new PartiallyInlineLibCalls();
+}

Modified: llvm/trunk/lib/Transforms/Scalar/Scalar.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/Scalar.cpp?rev=189097&r1=189096&r2=189097&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/Scalar.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/Scalar.cpp Fri Aug 23 05:27:02 2013
@@ -50,6 +50,7 @@ void llvm::initializeScalarOpts(PassRegi
   initializeLowerAtomicPass(Registry);
   initializeLowerExpectIntrinsicPass(Registry);
   initializeMemCpyOptPass(Registry);
+  initializePartiallyInlineLibCallsPass(Registry);
   initializeReassociatePass(Registry);
   initializeRegToMemPass(Registry);
   initializeSCCPPass(Registry);
@@ -123,6 +124,10 @@ void LLVMAddMemCpyOptPass(LLVMPassManage
   unwrap(PM)->add(createMemCpyOptPass());
 }
 
+void LLVMAddPartiallyInlineLibCallsPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createPartiallyInlineLibCallsPass());
+}
+
 void LLVMAddPromoteMemoryToRegisterPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createPromoteMemoryToRegisterPass());
 }

Modified: llvm/trunk/test/CodeGen/SystemZ/fp-sqrt-01.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/fp-sqrt-01.ll?rev=189097&r1=189096&r2=189097&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/fp-sqrt-01.ll (original)
+++ llvm/trunk/test/CodeGen/SystemZ/fp-sqrt-01.ll Fri Aug 23 05:27:02 2013
@@ -2,7 +2,8 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
-declare float @llvm.sqrt.f32(float %f)
+declare float @llvm.sqrt.f32(float)
+declare float @sqrtf(float)
 
 ; Check register square root.
 define float @f1(float %val) {
@@ -152,3 +153,17 @@ define void @f7(float *%ptr) {
 
   ret void
 }
+
+; Check that a call to the normal sqrtf function is lowered.
+define float @f8(float %dummy, float %val) {
+; CHECK-LABEL: f8:
+; CHECK: sqebr %f0, %f2
+; CHECK: cebr %f0, %f0
+; CHECK: jo [[LABEL:\.L.*]]
+; CHECK: br %r14
+; CHECK: [[LABEL]]:
+; CHECK: ler %f0, %f2
+; CHECK: jg sqrtf at PLT
+  %res = tail call float @sqrtf(float %val)
+  ret float %res
+}

Modified: llvm/trunk/test/CodeGen/SystemZ/fp-sqrt-02.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/fp-sqrt-02.ll?rev=189097&r1=189096&r2=189097&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/fp-sqrt-02.ll (original)
+++ llvm/trunk/test/CodeGen/SystemZ/fp-sqrt-02.ll Fri Aug 23 05:27:02 2013
@@ -3,6 +3,7 @@
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
 declare double @llvm.sqrt.f64(double %f)
+declare double @sqrt(double)
 
 ; Check register square root.
 define double @f1(double %val) {
@@ -152,3 +153,17 @@ define void @f7(double *%ptr) {
 
   ret void
 }
+
+; Check that a call to the normal sqrt function is lowered.
+define double @f8(double %dummy, double %val) {
+; CHECK-LABEL: f8:
+; CHECK: sqdbr %f0, %f2
+; CHECK: cdbr %f0, %f0
+; CHECK: jo [[LABEL:\.L.*]]
+; CHECK: br %r14
+; CHECK: [[LABEL]]:
+; CHECK: ldr %f0, %f2
+; CHECK: jg sqrt at PLT
+  %res = tail call double @sqrt(double %val)
+  ret double %res
+}





More information about the llvm-commits mailing list