[llvm] 2303e93 - [Codegen][ReplaceWithVecLib] add pass to replace vector intrinsics with calls to vector library

Fri Feb 5 11:25:30 PST 2021

Author: Lukas Sommer
Date: 2021-02-05T14:25:19-05:00
New Revision: 2303e93e666e13ebf6d24323729c28f520ecca37

URL: https://github.com/llvm/llvm-project/commit/2303e93e666e13ebf6d24323729c28f520ecca37
DIFF: https://github.com/llvm/llvm-project/commit/2303e93e666e13ebf6d24323729c28f520ecca37.diff

LOG: [Codegen][ReplaceWithVecLib] add pass to replace vector intrinsics with calls to vector library

This patch adds a pass to replace calls to vector intrinsics
(i.e., LLVM intrinsics operating on vector operands) with
calls to a vector library.

Currently, calls to LLVM intrinsics are only replaced with
calls to vector libraries when scalar calls to intrinsics are
vectorized by the Loop- or SLP-Vectorizer.

With this pass, it is now possible to replace calls to LLVM
intrinsics already operating on vector operands, e.g., if
such code was generated by MLIR. For the replacement,
information from the TargetLibraryInfo, e.g., as specified
via -vector-library is used.

Differential Revision: https://reviews.llvm.org/D95373

Added: 
    llvm/include/llvm/CodeGen/ReplaceWithVeclib.h
    llvm/lib/CodeGen/ReplaceWithVeclib.cpp
    llvm/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll

Modified: 
    llvm/include/llvm/CodeGen/CodeGenPassBuilder.h
    llvm/include/llvm/CodeGen/MachinePassRegistry.def
    llvm/include/llvm/CodeGen/Passes.h
    llvm/include/llvm/InitializePasses.h
    llvm/lib/CodeGen/CMakeLists.txt
    llvm/lib/CodeGen/TargetPassConfig.cpp
    llvm/test/CodeGen/AArch64/O3-pipeline.ll
    llvm/test/CodeGen/ARM/O3-pipeline.ll
    llvm/test/CodeGen/X86/opt-pipeline.ll
    llvm/tools/llc/llc.cpp
    llvm/tools/opt/opt.cpp
    llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h
index 1a94d28e3807..412300b180b7 100644

--- a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h
@@ -29,6 +29,7 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachinePassManager.h"
 #include "llvm/CodeGen/PreISelIntrinsicLowering.h"
+#include "llvm/CodeGen/ReplaceWithVeclib.h"
 #include "llvm/CodeGen/UnreachableBlockElim.h"
 #include "llvm/IR/IRPrintingPasses.h"
 #include "llvm/IR/PassManager.h"
@@ -650,6 +651,12 @@ void CodeGenPassBuilder<Derived>::addIRPasses(AddIRPass &addPass) const {
   if (getOptLevel() != CodeGenOpt::None && !Opt.DisableConstantHoisting)
     addPass(ConstantHoistingPass());
 
+  if (getOptLevel() != CodeGenOpt::None) {
+    // Replace calls to LLVM intrinsics (e.g., exp, log) operating on vector
+    // operands with calls to the corresponding functions in a vector library.
+    addPass(ReplaceWithVeclib());
+  }
+
   if (getOptLevel() != CodeGenOpt::None && !Opt.DisablePartialLibcallInlining)
     addPass(PartiallyInlineLibCallsPass());
 

diff  --git a/llvm/include/llvm/CodeGen/MachinePassRegistry.def b/llvm/include/llvm/CodeGen/MachinePassRegistry.def
index e9eaa5f77000..9fd07018a9a3 100644
--- a/llvm/include/llvm/CodeGen/MachinePassRegistry.def
+++ b/llvm/include/llvm/CodeGen/MachinePassRegistry.def
@@ -39,6 +39,7 @@ FUNCTION_PASS("mergeicmps", MergeICmpsPass, ())
 FUNCTION_PASS("lower-constant-intrinsics", LowerConstantIntrinsicsPass, ())
 FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass, ())
 FUNCTION_PASS("consthoist", ConstantHoistingPass, ())
+FUNCTION_PASS("replace-with-veclib", ReplaceWithVeclib, ())
 FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass, ())
 FUNCTION_PASS("ee-instrument", EntryExitInstrumenterPass, (false))
 FUNCTION_PASS("post-inline-ee-instrument", EntryExitInstrumenterPass, (true))

diff  --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index 676ed2c65eb1..15dd7d1300fa 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -448,6 +448,10 @@ namespace llvm {
   /// shuffles.
   FunctionPass *createExpandReductionsPass();
 
+  // This pass replaces intrinsics operating on vector operands with calls to
+  // the corresponding function in a vector library (e.g., SVML, libmvec).
+  FunctionPass *createReplaceWithVeclibLegacyPass();
+
   // This pass expands memcmp() to load/stores.
   FunctionPass *createExpandMemCmpPass();
 

diff  --git a/llvm/include/llvm/CodeGen/ReplaceWithVeclib.h b/llvm/include/llvm/CodeGen/ReplaceWithVeclib.h
new file mode 100644
index 000000000000..643ffca4ebd9
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/ReplaceWithVeclib.h
@@ -0,0 +1,38 @@
+//===- ReplaceWithVeclib.h - Replace vector instrinsics with veclib calls -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Replaces calls to LLVM vector intrinsics (i.e., calls to LLVM intrinsics
+// with vector operands) with matching calls to functions from a vector
+// library (e.g., libmvec, SVML) according to TargetLibraryInfo.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TRANSFORMS_UTILS_REPLACEWITHVECLIB_H
+#define LLVM_TRANSFORMS_UTILS_REPLACEWITHVECLIB_H
+
+#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
+
+namespace llvm {
+class ReplaceWithVeclib : public PassInfoMixin<ReplaceWithVeclib> {
+public:
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+// Legacy pass
+class ReplaceWithVeclibLegacy : public FunctionPass {
+public:
+  static char ID;
+  ReplaceWithVeclibLegacy() : FunctionPass(ID) {
+    initializeReplaceWithVeclibLegacyPass(*PassRegistry::getPassRegistry());
+  }
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+  bool runOnFunction(Function &F) override;
+};
+
+} // End namespace llvm
+#endif // LLVM_TRANSFORMS_UTILS_REPLACEWITHVECLIB_H

diff  --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 4f89179a03de..c2daddcf69f9 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -380,6 +380,7 @@ void initializeRegionPrinterPass(PassRegistry&);
 void initializeRegionViewerPass(PassRegistry&);
 void initializeRegisterCoalescerPass(PassRegistry&);
 void initializeRenameIndependentSubregsPass(PassRegistry&);
+void initializeReplaceWithVeclibLegacyPass(PassRegistry &);
 void initializeResetMachineFunctionPass(PassRegistry&);
 void initializeReversePostOrderFunctionAttrsLegacyPassPass(PassRegistry&);
 void initializeRewriteStatepointsForGCLegacyPassPass(PassRegistry &);

diff  --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
index 32a7946af63b..93b1fe78873f 100644
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -147,6 +147,7 @@ add_llvm_component_library(LLVMCodeGen
   RegisterUsageInfo.cpp
   RegUsageInfoCollector.cpp
   RegUsageInfoPropagate.cpp
+  ReplaceWithVeclib.cpp
   ResetMachineFunctionPass.cpp
   SafeStack.cpp
   SafeStackLayout.cpp

diff  --git a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
new file mode 100644
index 000000000000..2f2d8b54706b
--- /dev/null
+++ b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
@@ -0,0 +1,256 @@
+//=== ReplaceWithVeclib.cpp - Replace vector instrinsics with veclib calls ===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Replaces calls to LLVM vector intrinsics (i.e., calls to LLVM intrinsics
+// with vector operands) with matching calls to functions from a vector
+// library (e.g., libmvec, SVML) according to TargetLibraryInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ReplaceWithVeclib.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DemandedBits.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "replace-with-veclib"
+
+STATISTIC(NumCallsReplaced,
+          "Number of calls to intrinsics that have been replaced.");
+
+STATISTIC(NumTLIFuncDeclAdded,
+          "Number of vector library function declarations added.");
+
+STATISTIC(NumFuncUsedAdded,
+          "Number of functions added to `llvm.compiler.used`");
+
+static bool replaceWithTLIFunction(CallInst &CI, const StringRef TLIName) {
+  Module *M = CI.getModule();
+
+  Function *OldFunc = CI.getCalledFunction();
+
+  // Check if the vector library function is already declared in this module,
+  // otherwise insert it.
+  Function *TLIFunc = M->getFunction(TLIName);
+  if (!TLIFunc) {
+    TLIFunc = Function::Create(OldFunc->getFunctionType(),
+                               Function::ExternalLinkage, TLIName, *M);
+    TLIFunc->copyAttributesFrom(OldFunc);
+
+    LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Added vector library function `"
+                      << TLIName << "` of type `" << *(TLIFunc->getType())
+                      << "` to module.\n");
+
+    ++NumTLIFuncDeclAdded;
+
+    // Add the freshly created function to llvm.compiler.used,
+    // similar to as it is done in InjectTLIMappings
+    appendToCompilerUsed(*M, {TLIFunc});
+
+    LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Adding `" << TLIName
+                      << "` to `@llvm.compiler.used`.\n");
+    ++NumFuncUsedAdded;
+  }
+
+  // Replace the call to the vector intrinsic with a call
+  // to the corresponding function from the vector library.
+  IRBuilder<> IRBuilder{&CI};
+  SmallVector<Value *> Args(CI.arg_operands());
+  // Preserve the operand bundles.
+  SmallVector<OperandBundleDef, 1> OpBundles;
+  CI.getOperandBundlesAsDefs(OpBundles);
+  CallInst *Replacement = IRBuilder.CreateCall(TLIFunc, Args, OpBundles);
+  assert(OldFunc->getFunctionType() == TLIFunc->getFunctionType() &&
+         "Expecting function types to be identical");
+  CI.replaceAllUsesWith(Replacement);
+  if (isa<FPMathOperator>(Replacement)) {
+    // Preserve fast math flags for FP math.
+    Replacement->copyFastMathFlags(&CI);
+  }
+
+  LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Replaced call to `"
+                    << OldFunc->getName() << "` with call to `" << TLIName
+                    << "`.\n");
+  ++NumCallsReplaced;
+  return true;
+}
+
+static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
+                                    CallInst &CI) {
+  if (!CI.getCalledFunction()) {
+    return false;
+  }
+
+  auto IntrinsicID = CI.getCalledFunction()->getIntrinsicID();
+  if (IntrinsicID == Intrinsic::not_intrinsic) {
+    // Replacement is only performed for intrinsic functions
+    return false;
+  }
+
+  // Convert vector arguments to scalar type and check that
+  // all vector operands have identical vector width.
+  unsigned VF = 0;
+  SmallVector<Type *> ScalarTypes;
+  for (auto Arg : enumerate(CI.arg_operands())) {
+    auto *ArgType = Arg.value()->getType();
+    // Vector calls to intrinsics can still have
+    // scalar operands for specific arguments.
+    if (hasVectorInstrinsicScalarOpd(IntrinsicID, Arg.index())) {
+      ScalarTypes.push_back(ArgType);
+    } else {
+      // The argument in this place should be a vector if
+      // this is a call to a vector intrinsic.
+      auto *VectorArgTy = dyn_cast<VectorType>(ArgType);
+      if (!VectorArgTy) {
+        // The argument is not a vector, do not perform
+        // the replacement.
+        return false;
+      }
+      auto NumElements = VectorArgTy->getElementCount();
+      if (NumElements.isScalable()) {
+        // The current implementation does not support
+        // scalable vectors.
+        return false;
+      }
+      if (VF && VF != NumElements.getFixedValue()) {
+        // The 
diff erent arguments 
diff er in vector size.
+        return false;
+      } else {
+        VF = NumElements.getFixedValue();
+      }
+      ScalarTypes.push_back(VectorArgTy->getElementType());
+    }
+  }
+
+  // Try to reconstruct the name for the scalar version of this
+  // intrinsic using the intrinsic ID and the argument types
+  // converted to scalar above.
+  std::string ScalarName;
+  if (Intrinsic::isOverloaded(IntrinsicID)) {
+    ScalarName = Intrinsic::getName(IntrinsicID, ScalarTypes);
+  } else {
+    ScalarName = Intrinsic::getName(IntrinsicID).str();
+  }
+
+  if (!TLI.isFunctionVectorizable(ScalarName)) {
+    // The TargetLibraryInfo does not contain a vectorized version of
+    // the scalar function.
+    return false;
+  }
+
+  // Try to find the mapping for the scalar version of this intrinsic
+  // and the exact vector width of the call operands in the
+  // TargetLibraryInfo.
+  const std::string TLIName =
+      std::string(TLI.getVectorizedFunction(ScalarName, VF));
+
+  LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Looking up TLI mapping for `"
+                    << ScalarName << "` and vector width " << VF << ".\n");
+
+  if (!TLIName.empty()) {
+    // Found the correct mapping in the TargetLibraryInfo,
+    // replace the call to the intrinsic with a call to
+    // the vector library function.
+    LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Found TLI function `" << TLIName
+                      << "`.\n");
+    return replaceWithTLIFunction(CI, TLIName);
+  }
+
+  return false;
+}
+
+static bool runImpl(const TargetLibraryInfo &TLI, Function &F) {
+  bool Changed = false;
+  SmallVector<CallInst *> ReplacedCalls;
+  for (auto &I : instructions(F)) {
+    if (auto *CI = dyn_cast<CallInst>(&I)) {
+      if (replaceWithCallToVeclib(TLI, *CI)) {
+        ReplacedCalls.push_back(CI);
+        Changed = true;
+      }
+    }
+  }
+  // Erase the calls to the intrinsics that have been replaced
+  // with calls to the vector library.
+  for (auto *CI : ReplacedCalls) {
+    CI->eraseFromParent();
+  }
+  return Changed;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// New pass manager implementation.
+////////////////////////////////////////////////////////////////////////////////
+PreservedAnalyses ReplaceWithVeclib::run(Function &F,
+                                         FunctionAnalysisManager &AM) {
+  const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+  auto Changed = runImpl(TLI, F);
+  if (Changed) {
+    PreservedAnalyses PA;
+    PA.preserveSet<CFGAnalyses>();
+    PA.preserve<TargetLibraryAnalysis>();
+    PA.preserve<ScalarEvolutionAnalysis>();
+    PA.preserve<AAManager>();
+    PA.preserve<LoopAccessAnalysis>();
+    PA.preserve<DemandedBitsAnalysis>();
+    PA.preserve<OptimizationRemarkEmitterAnalysis>();
+    PA.preserve<GlobalsAA>();
+    return PA;
+  } else {
+    // The pass did not replace any calls, hence it preserves all analyses.
+    return PreservedAnalyses::all();
+  }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Legacy PM Implementation.
+////////////////////////////////////////////////////////////////////////////////
+bool ReplaceWithVeclibLegacy::runOnFunction(Function &F) {
+  const TargetLibraryInfo &TLI =
+      getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+  return runImpl(TLI, F);
+}
+
+void ReplaceWithVeclibLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  AU.addRequired<TargetLibraryInfoWrapperPass>();
+  AU.addPreserved<TargetLibraryInfoWrapperPass>();
+  AU.addPreserved<ScalarEvolutionWrapperPass>();
+  AU.addPreserved<AAResultsWrapperPass>();
+  AU.addPreserved<LoopAccessLegacyAnalysis>();
+  AU.addPreserved<DemandedBitsWrapperPass>();
+  AU.addPreserved<OptimizationRemarkEmitterWrapperPass>();
+  AU.addPreserved<GlobalsAAWrapperPass>();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Legacy Pass manager initialization
+////////////////////////////////////////////////////////////////////////////////
+char ReplaceWithVeclibLegacy::ID = 0;
+
+INITIALIZE_PASS_BEGIN(ReplaceWithVeclibLegacy, DEBUG_TYPE,
+                      "Replace intrinsics with calls to vector library", false,
+                      false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(ReplaceWithVeclibLegacy, DEBUG_TYPE,
+                    "Replace intrinsics with calls to vector library", false,
+                    false)
+
+FunctionPass *llvm::createReplaceWithVeclibLegacyPass() {
+  return new ReplaceWithVeclibLegacy();
+}

diff  --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index e844d03854e2..3e5a66611ada 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -858,6 +858,9 @@ void TargetPassConfig::addIRPasses() {
   if (getOptLevel() != CodeGenOpt::None && !DisableConstantHoisting)
     addPass(createConstantHoistingPass());
 
+  if (getOptLevel() != CodeGenOpt::None)
+    addPass(createReplaceWithVeclibLegacyPass());
+
   if (getOptLevel() != CodeGenOpt::None && !DisablePartialLibcallInlining)
     addPass(createPartiallyInlineLibCallsPass());
 

diff  --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
index 47ad73debf4a..aa0f37a1f7cf 100644
--- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -54,6 +54,7 @@
 ; CHECK-NEXT:       Branch Probability Analysis
 ; CHECK-NEXT:       Block Frequency Analysis
 ; CHECK-NEXT:       Constant Hoisting
+; CHECK-NEXT:       Replace intrinsics with calls to vector library
 ; CHECK-NEXT:       Partially inline calls to library functions
 ; CHECK-NEXT:       Instrument function entry/exit with calls to e.g. mcount() (post inlining)
 ; CHECK-NEXT:       Scalarize Masked Memory Intrinsics

diff  --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll
index 18471ca8e403..21f09fd097e4 100644
--- a/llvm/test/CodeGen/ARM/O3-pipeline.ll
+++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll
@@ -34,6 +34,7 @@
 ; CHECK-NEXT:      Branch Probability Analysis
 ; CHECK-NEXT:      Block Frequency Analysis
 ; CHECK-NEXT:      Constant Hoisting
+; CHECK-NEXT:      Replace intrinsics with calls to vector library
 ; CHECK-NEXT:      Partially inline calls to library functions
 ; CHECK-NEXT:      Instrument function entry/exit with calls to e.g. mcount() (post inlining)
 ; CHECK-NEXT:      Scalarize Masked Memory Intrinsics

diff  --git a/llvm/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll b/llvm/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll
new file mode 100644
index 000000000000..d0b31f322f44
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll
@@ -0,0 +1,105 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes
+; RUN: opt -vector-library=SVML -replace-with-veclib -S < %s | FileCheck %s  --check-prefixes=COMMON,SVML
+; RUN: opt -vector-library=LIBMVEC-X86 -replace-with-veclib -S < %s | FileCheck %s  --check-prefixes=COMMON,LIBMVEC-X86
+; RUN: opt -vector-library=MASSV -replace-with-veclib -S < %s | FileCheck %s  --check-prefixes=COMMON,MASSV
+; RUN: opt -vector-library=Accelerate -replace-with-veclib -S < %s | FileCheck %s  --check-prefixes=COMMON,ACCELERATE
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define <4 x double> @exp_v4(<4 x double> %in) {
+; SVML-LABEL: define {{[^@]+}}@exp_v4
+; SVML-SAME: (<4 x double> [[IN:%.*]]) {
+; SVML-NEXT:    [[TMP1:%.*]] = call <4 x double> @__svml_exp4(<4 x double> [[IN]])
+; SVML-NEXT:    ret <4 x double> [[TMP1]]
+;
+; LIBMVEC-X86-LABEL: define {{[^@]+}}@exp_v4
+; LIBMVEC-X86-SAME: (<4 x double> [[IN:%.*]]) {
+; LIBMVEC-X86-NEXT:    [[TMP1:%.*]] = call <4 x double> @_ZGVdN4v_exp(<4 x double> [[IN]])
+; LIBMVEC-X86-NEXT:    ret <4 x double> [[TMP1]]
+;
+; MASSV-LABEL: define {{[^@]+}}@exp_v4
+; MASSV-SAME: (<4 x double> [[IN:%.*]]) {
+; MASSV-NEXT:    [[CALL:%.*]] = call <4 x double> @llvm.exp.v4f64(<4 x double> [[IN]])
+; MASSV-NEXT:    ret <4 x double> [[CALL]]
+;
+; ACCELERATE-LABEL: define {{[^@]+}}@exp_v4
+; ACCELERATE-SAME: (<4 x double> [[IN:%.*]]) {
+; ACCELERATE-NEXT:    [[CALL:%.*]] = call <4 x double> @llvm.exp.v4f64(<4 x double> [[IN]])
+; ACCELERATE-NEXT:    ret <4 x double> [[CALL]]
+;
+  %call = call <4 x double> @llvm.exp.v4f64(<4 x double> %in)
+  ret <4 x double> %call
+}
+
+declare <4 x double> @llvm.exp.v4f64(<4 x double>) #0
+
+define <4 x float> @exp_f32(<4 x float> %in) {
+; SVML-LABEL: define {{[^@]+}}@exp_f32
+; SVML-SAME: (<4 x float> [[IN:%.*]]) {
+; SVML-NEXT:    [[TMP1:%.*]] = call <4 x float> @__svml_expf4(<4 x float> [[IN]])
+; SVML-NEXT:    ret <4 x float> [[TMP1]]
+;
+; LIBMVEC-X86-LABEL: define {{[^@]+}}@exp_f32
+; LIBMVEC-X86-SAME: (<4 x float> [[IN:%.*]]) {
+; LIBMVEC-X86-NEXT:    [[TMP1:%.*]] = call <4 x float> @_ZGVbN4v_expf(<4 x float> [[IN]])
+; LIBMVEC-X86-NEXT:    ret <4 x float> [[TMP1]]
+;
+; MASSV-LABEL: define {{[^@]+}}@exp_f32
+; MASSV-SAME: (<4 x float> [[IN:%.*]]) {
+; MASSV-NEXT:    [[TMP1:%.*]] = call <4 x float> @__expf4_massv(<4 x float> [[IN]])
+; MASSV-NEXT:    ret <4 x float> [[TMP1]]
+;
+; ACCELERATE-LABEL: define {{[^@]+}}@exp_f32
+; ACCELERATE-SAME: (<4 x float> [[IN:%.*]]) {
+; ACCELERATE-NEXT:    [[TMP1:%.*]] = call <4 x float> @vexpf(<4 x float> [[IN]])
+; ACCELERATE-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %call = call <4 x float> @llvm.exp.v4f32(<4 x float> %in)
+  ret <4 x float> %call
+}
+
+declare <4 x float> @llvm.exp.v4f32(<4 x float>) #0
+
+; No replacement should take place for non-vector intrinsic.
+define double @exp_f64(double %in) {
+; COMMON-LABEL: define {{[^@]+}}@exp_f64
+; COMMON-SAME: (double [[IN:%.*]]) {
+; COMMON-NEXT:    [[CALL:%.*]] = call double @llvm.exp.f64(double [[IN]])
+; COMMON-NEXT:    ret double [[CALL]]
+;
+  %call = call double @llvm.exp.f64(double %in)
+  ret double %call
+}
+
+declare double @llvm.exp.f64(double) #0
+
+; Check that the pass works with scalar operands on
+; vector intrinsics. No vector library has a substitute for powi.
+define <4 x double> @powi_v4(<4 x double> %in){
+; COMMON-LABEL: define {{[^@]+}}@powi_v4
+; COMMON-SAME: (<4 x double> [[IN:%.*]]) {
+; COMMON-NEXT:    [[CALL:%.*]] = call <4 x double> @llvm.powi.v4f64(<4 x double> [[IN]], i32 3)
+; COMMON-NEXT:    ret <4 x double> [[CALL]]
+;
+  %call = call <4 x double> @llvm.powi.v4f64(<4 x double> %in, i32 3)
+  ret <4 x double> %call
+}
+
+declare <4 x double> @llvm.powi.v4f64(<4 x double>, i32) #0
+
+; Replacement should not take place if the vector length
+; does not match exactly.
+define <3 x double> @exp_v3(<3 x double> %in) {
+; COMMON-LABEL: define {{[^@]+}}@exp_v3
+; COMMON-SAME: (<3 x double> [[IN:%.*]]) {
+; COMMON-NEXT:    [[CALL:%.*]] = call <3 x double> @llvm.exp.v3f64(<3 x double> [[IN]])
+; COMMON-NEXT:    ret <3 x double> [[CALL]]
+;
+  %call = call <3 x double> @llvm.exp.v3f64(<3 x double> %in)
+  ret <3 x double> %call
+}
+
+declare <3 x double> @llvm.exp.v3f64(<3 x double>) #0
+
+attributes #0 = {nounwind readnone}

diff  --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll
index 501a3badeab2..8c4112f5cd77 100644
--- a/llvm/test/CodeGen/X86/opt-pipeline.ll
+++ b/llvm/test/CodeGen/X86/opt-pipeline.ll
@@ -51,6 +51,7 @@
 ; CHECK-NEXT:       Branch Probability Analysis
 ; CHECK-NEXT:       Block Frequency Analysis
 ; CHECK-NEXT:       Constant Hoisting
+; CHECK-NEXT:       Replace intrinsics with calls to vector library
 ; CHECK-NEXT:       Partially inline calls to library functions
 ; CHECK-NEXT:       Instrument function entry/exit with calls to e.g. mcount() (post inlining)
 ; CHECK-NEXT:       Scalarize Masked Memory Intrinsics

diff  --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp
index 48f0adf7c726..7640aa4948c7 100644
--- a/llvm/tools/llc/llc.cpp
+++ b/llvm/tools/llc/llc.cpp
@@ -351,6 +351,7 @@ int main(int argc, char **argv) {
   initializeExpandReductionsPass(*Registry);
   initializeHardwareLoopsPass(*Registry);
   initializeTransformUtils(*Registry);
+  initializeReplaceWithVeclibLegacyPass(*Registry);
 
   // Initialize debugging passes.
   initializeScavengerTestPass(*Registry);

diff  --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp
index a45c575d1748..de8095a6fe37 100644
--- a/llvm/tools/opt/opt.cpp
+++ b/llvm/tools/opt/opt.cpp
@@ -574,6 +574,7 @@ int main(int argc, char **argv) {
   initializeWriteBitcodePassPass(Registry);
   initializeHardwareLoopsPass(Registry);
   initializeTypePromotionPass(Registry);
+  initializeReplaceWithVeclibLegacyPass(Registry);
 
 #ifdef BUILD_EXAMPLES
   initializeExampleIRTransforms(Registry);

diff  --git a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn
index 49cbd51de379..b6a14af84b90 100644
--- a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn
@@ -168,6 +168,7 @@ static_library("CodeGen") {
     "RegisterScavenging.cpp",
     "RegisterUsageInfo.cpp",
     "RenameIndependentSubregs.cpp",
+    "ReplaceWithVeclib.cpp",
     "ResetMachineFunctionPass.cpp",
     "SafeStack.cpp",
     "SafeStackLayout.cpp",