[Mlir-commits] [mlir] 3e21fb6 - [MLIR] Generic 'malloc', 'aligned_alloc' and 'free' functions

Mon Jul 18 08:59:05 PDT 2022

Author: Michele Scuttari
Date: 2022-07-18T17:58:58+02:00
New Revision: 3e21fb616d9a1b29bf9d1a1ba484add633d6d5b3

URL: https://github.com/llvm/llvm-project/commit/3e21fb616d9a1b29bf9d1a1ba484add633d6d5b3
DIFF: https://github.com/llvm/llvm-project/commit/3e21fb616d9a1b29bf9d1a1ba484add633d6d5b3.diff

LOG: [MLIR] Generic 'malloc', 'aligned_alloc' and 'free' functions

When converted to the LLVM dialect, the memref.alloc and memref.free operations were generating calls to hardcoded 'malloc' and 'free' functions. This didn't leave any freedom to users to provide their custom implementation. Those operations now convert into calls to '_mlir_alloc' and '_mlir_free' functions, which have also been implemented into the runtime support library as wrappers to 'malloc' and 'free'. The same has been done for the 'aligned_alloc' function.

Reviewed By: ftynse

Differential Revision: https://reviews.llvm.org/D128791

Added: 
    mlir/examples/toy/Ch6/mlir/AllocRenamingPass.cpp
    mlir/examples/toy/Ch7/mlir/AllocRenamingPass.cpp

Modified: 
    mlir/docs/Tutorials/Toy/Ch-6.md
    mlir/examples/toy/Ch6/CMakeLists.txt
    mlir/examples/toy/Ch6/include/toy/Passes.h
    mlir/examples/toy/Ch6/toyc.cpp
    mlir/examples/toy/Ch7/CMakeLists.txt
    mlir/examples/toy/Ch7/include/toy/Passes.h
    mlir/examples/toy/Ch7/toyc.cpp
    mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h
    mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp
    mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp
    mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp
    mlir/lib/ExecutionEngine/RunnerUtils.cpp
    mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
    mlir/test/Conversion/AsyncToLLVM/convert-coro-to-llvm.mlir
    mlir/test/Conversion/AsyncToLLVM/convert-to-llvm.mlir
    mlir/test/Conversion/FuncToLLVM/calling-convention.mlir
    mlir/test/Conversion/MemRefToLLVM/convert-dynamic-memref-ops.mlir
    mlir/test/Conversion/MemRefToLLVM/convert-static-memref-ops.mlir
    mlir/test/Target/LLVMIR/llvmir.mlir
    mlir/test/mlir-cpu-runner/bare-ptr-call-conv.mlir
    mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir
    mlir/test/mlir-cpu-runner/simple.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/docs/Tutorials/Toy/Ch-6.md b/mlir/docs/Tutorials/Toy/Ch-6.md
index 3605188d9999d..574b62ca68f3b 100644

--- a/mlir/docs/Tutorials/Toy/Ch-6.md
+++ b/mlir/docs/Tutorials/Toy/Ch-6.md
@@ -171,6 +171,16 @@ llvm.func @main() {
 }
 ```
 
+Even though not visible from the generated LLVM dialect, it must be noted that
+the conversion of the Memref dialect into the LLVM one does not produce calls
+to the `malloc` and `free` functions, but rather to the `_mlir_alloc` and
+`_mlir_free` functions. Their names have been intentionally kept 
diff erent so
+that users can provide their own implementation by means of external libraries,
+thus allowing for 
diff erent behaviour or profiling. For the sake of simplicity,
+this tutorial also includes  a transformation pass converting them back to the
+well known `malloc` and `free` functions, thus partially hiding this complexity
+to newcomers.
+
 See [Conversion to the LLVM IR Dialect](../../ConversionToLLVMDialect.md) for
 more in-depth details on lowering to the LLVM dialect.
 

diff  --git a/mlir/examples/toy/Ch6/CMakeLists.txt b/mlir/examples/toy/Ch6/CMakeLists.txt
index 48094f56d6512..5fad31f6e59f4 100644
--- a/mlir/examples/toy/Ch6/CMakeLists.txt
+++ b/mlir/examples/toy/Ch6/CMakeLists.txt
@@ -16,6 +16,7 @@ add_public_tablegen_target(ToyCh6CombineIncGen)
 add_toy_chapter(toyc-ch6
   toyc.cpp
   parser/AST.cpp
+  mlir/AllocRenamingPass.cpp
   mlir/MLIRGen.cpp
   mlir/Dialect.cpp
   mlir/LowerToAffineLoops.cpp

diff  --git a/mlir/examples/toy/Ch6/include/toy/Passes.h b/mlir/examples/toy/Ch6/include/toy/Passes.h
index 62471dde46d85..907b425910154 100644
--- a/mlir/examples/toy/Ch6/include/toy/Passes.h
+++ b/mlir/examples/toy/Ch6/include/toy/Passes.h
@@ -29,6 +29,10 @@ std::unique_ptr<mlir::Pass> createLowerToAffinePass();
 /// well as `Affine` and `Std`, to the LLVM dialect for codegen.
 std::unique_ptr<mlir::Pass> createLowerToLLVMPass();
 
+/// Create a pass to rename the '_mlir_alloc' and '_mlir_free' functions to
+/// 'malloc' and 'free'.
+std::unique_ptr<mlir::Pass> createAllocRenamingPass();
+
 } // namespace toy
 } // namespace mlir
 

diff  --git a/mlir/examples/toy/Ch6/mlir/AllocRenamingPass.cpp b/mlir/examples/toy/Ch6/mlir/AllocRenamingPass.cpp
new file mode 100644
index 0000000000000..68809a330c4d2
--- /dev/null
+++ b/mlir/examples/toy/Ch6/mlir/AllocRenamingPass.cpp
@@ -0,0 +1,154 @@
+//====- AllocRenamingPass.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the renaming of '_mlir_alloc' and '_mlir_free' functions
+// respectively into 'malloc' and 'free', so that the Toy example doesn't have
+// to deal with runtime libraries to be linked.
+//
+//===----------------------------------------------------------------------===//
+
+#include "toy/Passes.h"
+
+#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVMPass.h"
+#include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/DialectConversion.h"
+#include "llvm/ADT/Sequence.h"
+
+using namespace mlir;
+
+//===----------------------------------------------------------------------===//
+// AllocRenamingPass RewritePatterns
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// Rename the '_mlir_alloc' function into 'malloc'
+class AllocFuncRenamePattern : public OpRewritePattern<LLVM::LLVMFuncOp> {
+public:
+  using OpRewritePattern<LLVM::LLVMFuncOp>::OpRewritePattern;
+
+  LogicalResult match(LLVM::LLVMFuncOp op) const override {
+    return LogicalResult::success(op.getName() == "_mlir_alloc");
+  }
+
+  void rewrite(LLVM::LLVMFuncOp op, PatternRewriter &rewriter) const override {
+    rewriter.replaceOpWithNewOp<LLVM::LLVMFuncOp>(
+        op, "malloc", op.getFunctionType(), op.getLinkage());
+  }
+};
+
+/// Rename the '_mlir_free' function into 'free'
+class FreeFuncRenamePattern : public OpRewritePattern<LLVM::LLVMFuncOp> {
+public:
+  using OpRewritePattern<LLVM::LLVMFuncOp>::OpRewritePattern;
+
+  LogicalResult match(LLVM::LLVMFuncOp op) const override {
+    return LogicalResult::success(op.getName() == "_mlir_free");
+  }
+
+  void rewrite(LLVM::LLVMFuncOp op, PatternRewriter &rewriter) const override {
+    rewriter.replaceOpWithNewOp<LLVM::LLVMFuncOp>(
+        op, "free", op.getFunctionType(), op.getLinkage());
+  }
+};
+
+/// Rename the calls to '_mlir_alloc' with calls to 'malloc'
+class AllocCallRenamePattern : public OpRewritePattern<LLVM::CallOp> {
+public:
+  using OpRewritePattern<LLVM::CallOp>::OpRewritePattern;
+
+  LogicalResult match(LLVM::CallOp op) const override {
+    auto callee = op.getCallee();
+
+    if (!callee)
+      return failure();
+
+    return LogicalResult::success(*callee == "_mlir_alloc");
+  }
+
+  void rewrite(LLVM::CallOp op, PatternRewriter &rewriter) const override {
+    rewriter.replaceOpWithNewOp<LLVM::CallOp>(op, op.getResultTypes(), "malloc",
+                                              op.getOperands());
+  }
+};
+
+/// Rename the calls to '_mlir_free' with calls to 'free'
+class FreeCallRenamePattern : public OpRewritePattern<LLVM::CallOp> {
+public:
+  using OpRewritePattern<LLVM::CallOp>::OpRewritePattern;
+
+  LogicalResult match(LLVM::CallOp op) const override {
+    auto callee = op.getCallee();
+
+    if (!callee)
+      return failure();
+
+    return LogicalResult::success(*callee == "_mlir_free");
+  }
+
+  void rewrite(LLVM::CallOp op, PatternRewriter &rewriter) const override {
+    rewriter.replaceOpWithNewOp<LLVM::CallOp>(op, op.getResultTypes(), "free",
+                                              op.getOperands());
+  }
+};
+} // namespace
+
+//===----------------------------------------------------------------------===//
+// AllocRenamingPass
+//===----------------------------------------------------------------------===//
+
+namespace {
+struct AllocRenamingPass
+    : public PassWrapper<AllocRenamingPass, OperationPass<ModuleOp>> {
+  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(AllocRenamingPass)
+
+  void getDependentDialects(DialectRegistry &registry) const override {
+    registry.insert<LLVM::LLVMDialect>();
+  }
+  void runOnOperation() final;
+};
+} // namespace
+
+void AllocRenamingPass::runOnOperation() {
+  LLVMConversionTarget target(getContext());
+
+  target.addDynamicallyLegalOp<LLVM::LLVMFuncOp>([](LLVM::LLVMFuncOp op) {
+    auto name = op.getName();
+    return name != "_mlir_alloc" && name != "_mlir_free";
+  });
+
+  target.addDynamicallyLegalOp<LLVM::CallOp>([](LLVM::CallOp op) {
+    auto callee = op.getCallee();
+
+    if (!callee)
+      return true;
+
+    return *callee != "_mlir_alloc" && *callee != "_mlir_free";
+  });
+
+  target.markUnknownOpDynamicallyLegal(
+      [](mlir::Operation *op) { return true; });
+
+  RewritePatternSet patterns(&getContext());
+
+  patterns.add<AllocFuncRenamePattern>(&getContext());
+  patterns.add<FreeFuncRenamePattern>(&getContext());
+  patterns.add<AllocCallRenamePattern>(&getContext());
+  patterns.add<FreeCallRenamePattern>(&getContext());
+
+  auto module = getOperation();
+  if (failed(applyFullConversion(module, target, std::move(patterns))))
+    signalPassFailure();
+}
+
+/// Create a pass to rename the '_mlir_alloc' and '_mlir_free' functions to
+/// 'malloc' and 'free'.
+std::unique_ptr<mlir::Pass> mlir::toy::createAllocRenamingPass() {
+  return std::make_unique<AllocRenamingPass>();
+}

diff  --git a/mlir/examples/toy/Ch6/toyc.cpp b/mlir/examples/toy/Ch6/toyc.cpp
index 32261ec82ebf9..8a0a6fd6f58b4 100644
--- a/mlir/examples/toy/Ch6/toyc.cpp
+++ b/mlir/examples/toy/Ch6/toyc.cpp
@@ -171,6 +171,7 @@ int loadAndProcessMLIR(mlir::MLIRContext &context,
   if (isLoweringToLLVM) {
     // Finish lowering the toy IR to the LLVM dialect.
     pm.addPass(mlir::toy::createLowerToLLVMPass());
+    pm.addPass(mlir::toy::createAllocRenamingPass());
   }
 
   if (mlir::failed(pm.run(*module)))

diff  --git a/mlir/examples/toy/Ch7/CMakeLists.txt b/mlir/examples/toy/Ch7/CMakeLists.txt
index 8de18d37e3d82..ec670da565fbc 100644
--- a/mlir/examples/toy/Ch7/CMakeLists.txt
+++ b/mlir/examples/toy/Ch7/CMakeLists.txt
@@ -16,6 +16,7 @@ add_public_tablegen_target(ToyCh7CombineIncGen)
 add_toy_chapter(toyc-ch7
   toyc.cpp
   parser/AST.cpp
+  mlir/AllocRenamingPass.cpp
   mlir/MLIRGen.cpp
   mlir/Dialect.cpp
   mlir/LowerToAffineLoops.cpp

diff  --git a/mlir/examples/toy/Ch7/include/toy/Passes.h b/mlir/examples/toy/Ch7/include/toy/Passes.h
index 62471dde46d85..907b425910154 100644
--- a/mlir/examples/toy/Ch7/include/toy/Passes.h
+++ b/mlir/examples/toy/Ch7/include/toy/Passes.h
@@ -29,6 +29,10 @@ std::unique_ptr<mlir::Pass> createLowerToAffinePass();
 /// well as `Affine` and `Std`, to the LLVM dialect for codegen.
 std::unique_ptr<mlir::Pass> createLowerToLLVMPass();
 
+/// Create a pass to rename the '_mlir_alloc' and '_mlir_free' functions to
+/// 'malloc' and 'free'.
+std::unique_ptr<mlir::Pass> createAllocRenamingPass();
+
 } // namespace toy
 } // namespace mlir
 

diff  --git a/mlir/examples/toy/Ch7/mlir/AllocRenamingPass.cpp b/mlir/examples/toy/Ch7/mlir/AllocRenamingPass.cpp
new file mode 100644
index 0000000000000..68809a330c4d2
--- /dev/null
+++ b/mlir/examples/toy/Ch7/mlir/AllocRenamingPass.cpp
@@ -0,0 +1,154 @@
+//====- AllocRenamingPass.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the renaming of '_mlir_alloc' and '_mlir_free' functions
+// respectively into 'malloc' and 'free', so that the Toy example doesn't have
+// to deal with runtime libraries to be linked.
+//
+//===----------------------------------------------------------------------===//
+
+#include "toy/Passes.h"
+
+#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVMPass.h"
+#include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/DialectConversion.h"
+#include "llvm/ADT/Sequence.h"
+
+using namespace mlir;
+
+//===----------------------------------------------------------------------===//
+// AllocRenamingPass RewritePatterns
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// Rename the '_mlir_alloc' function into 'malloc'
+class AllocFuncRenamePattern : public OpRewritePattern<LLVM::LLVMFuncOp> {
+public:
+  using OpRewritePattern<LLVM::LLVMFuncOp>::OpRewritePattern;
+
+  LogicalResult match(LLVM::LLVMFuncOp op) const override {
+    return LogicalResult::success(op.getName() == "_mlir_alloc");
+  }
+
+  void rewrite(LLVM::LLVMFuncOp op, PatternRewriter &rewriter) const override {
+    rewriter.replaceOpWithNewOp<LLVM::LLVMFuncOp>(
+        op, "malloc", op.getFunctionType(), op.getLinkage());
+  }
+};
+
+/// Rename the '_mlir_free' function into 'free'
+class FreeFuncRenamePattern : public OpRewritePattern<LLVM::LLVMFuncOp> {
+public:
+  using OpRewritePattern<LLVM::LLVMFuncOp>::OpRewritePattern;
+
+  LogicalResult match(LLVM::LLVMFuncOp op) const override {
+    return LogicalResult::success(op.getName() == "_mlir_free");
+  }
+
+  void rewrite(LLVM::LLVMFuncOp op, PatternRewriter &rewriter) const override {
+    rewriter.replaceOpWithNewOp<LLVM::LLVMFuncOp>(
+        op, "free", op.getFunctionType(), op.getLinkage());
+  }
+};
+
+/// Rename the calls to '_mlir_alloc' with calls to 'malloc'
+class AllocCallRenamePattern : public OpRewritePattern<LLVM::CallOp> {
+public:
+  using OpRewritePattern<LLVM::CallOp>::OpRewritePattern;
+
+  LogicalResult match(LLVM::CallOp op) const override {
+    auto callee = op.getCallee();
+
+    if (!callee)
+      return failure();
+
+    return LogicalResult::success(*callee == "_mlir_alloc");
+  }
+
+  void rewrite(LLVM::CallOp op, PatternRewriter &rewriter) const override {
+    rewriter.replaceOpWithNewOp<LLVM::CallOp>(op, op.getResultTypes(), "malloc",
+                                              op.getOperands());
+  }
+};
+
+/// Rename the calls to '_mlir_free' with calls to 'free'
+class FreeCallRenamePattern : public OpRewritePattern<LLVM::CallOp> {
+public:
+  using OpRewritePattern<LLVM::CallOp>::OpRewritePattern;
+
+  LogicalResult match(LLVM::CallOp op) const override {
+    auto callee = op.getCallee();
+
+    if (!callee)
+      return failure();
+
+    return LogicalResult::success(*callee == "_mlir_free");
+  }
+
+  void rewrite(LLVM::CallOp op, PatternRewriter &rewriter) const override {
+    rewriter.replaceOpWithNewOp<LLVM::CallOp>(op, op.getResultTypes(), "free",
+                                              op.getOperands());
+  }
+};
+} // namespace
+
+//===----------------------------------------------------------------------===//
+// AllocRenamingPass
+//===----------------------------------------------------------------------===//
+
+namespace {
+struct AllocRenamingPass
+    : public PassWrapper<AllocRenamingPass, OperationPass<ModuleOp>> {
+  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(AllocRenamingPass)
+
+  void getDependentDialects(DialectRegistry &registry) const override {
+    registry.insert<LLVM::LLVMDialect>();
+  }
+  void runOnOperation() final;
+};
+} // namespace
+
+void AllocRenamingPass::runOnOperation() {
+  LLVMConversionTarget target(getContext());
+
+  target.addDynamicallyLegalOp<LLVM::LLVMFuncOp>([](LLVM::LLVMFuncOp op) {
+    auto name = op.getName();
+    return name != "_mlir_alloc" && name != "_mlir_free";
+  });
+
+  target.addDynamicallyLegalOp<LLVM::CallOp>([](LLVM::CallOp op) {
+    auto callee = op.getCallee();
+
+    if (!callee)
+      return true;
+
+    return *callee != "_mlir_alloc" && *callee != "_mlir_free";
+  });
+
+  target.markUnknownOpDynamicallyLegal(
+      [](mlir::Operation *op) { return true; });
+
+  RewritePatternSet patterns(&getContext());
+
+  patterns.add<AllocFuncRenamePattern>(&getContext());
+  patterns.add<FreeFuncRenamePattern>(&getContext());
+  patterns.add<AllocCallRenamePattern>(&getContext());
+  patterns.add<FreeCallRenamePattern>(&getContext());
+
+  auto module = getOperation();
+  if (failed(applyFullConversion(module, target, std::move(patterns))))
+    signalPassFailure();
+}
+
+/// Create a pass to rename the '_mlir_alloc' and '_mlir_free' functions to
+/// 'malloc' and 'free'.
+std::unique_ptr<mlir::Pass> mlir::toy::createAllocRenamingPass() {
+  return std::make_unique<AllocRenamingPass>();
+}

diff  --git a/mlir/examples/toy/Ch7/toyc.cpp b/mlir/examples/toy/Ch7/toyc.cpp
index 2b8dc76f5993a..472b685d91a40 100644
--- a/mlir/examples/toy/Ch7/toyc.cpp
+++ b/mlir/examples/toy/Ch7/toyc.cpp
@@ -172,6 +172,7 @@ int loadAndProcessMLIR(mlir::MLIRContext &context,
   if (isLoweringToLLVM) {
     // Finish lowering the toy IR to the LLVM dialect.
     pm.addPass(mlir::toy::createLowerToLLVMPass());
+    pm.addPass(mlir::toy::createAllocRenamingPass());
   }
 
   if (mlir::failed(pm.run(*module)))

diff  --git a/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h b/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h
index 6380ff2d8e132..960570ff254d7 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h
+++ b/mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h
@@ -45,6 +45,7 @@ LLVM::LLVMFuncOp lookupOrCreateMallocFn(ModuleOp moduleOp, Type indexType);
 LLVM::LLVMFuncOp lookupOrCreateAlignedAllocFn(ModuleOp moduleOp,
                                               Type indexType);
 LLVM::LLVMFuncOp lookupOrCreateFreeFn(ModuleOp moduleOp);
+LLVM::LLVMFuncOp lookupOrCreateAlignedFreeFn(ModuleOp moduleOp);
 LLVM::LLVMFuncOp lookupOrCreateMemRefCopyFn(ModuleOp moduleOp, Type indexType,
                                             Type unrankedDescriptorType);
 

diff  --git a/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp b/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp
index 159a726cd9196..b5debeeb0a19d 100644
--- a/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp
+++ b/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp
@@ -399,7 +399,7 @@ class CoroFreeOpConversion : public OpConversionPattern<CoroFreeOp> {
 
     // Free the memory.
     auto freeFuncOp =
-        LLVM::lookupOrCreateFreeFn(op->getParentOfType<ModuleOp>());
+        LLVM::lookupOrCreateAlignedFreeFn(op->getParentOfType<ModuleOp>());
     rewriter.replaceOpWithNewOp<LLVM::CallOp>(op, TypeRange(),
                                               SymbolRefAttr::get(freeFuncOp),
                                               ValueRange(coroMem.getResult()));

diff  --git a/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp b/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp
index 6e363997ac16d..c48e5884a291e 100644
--- a/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp
+++ b/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp
@@ -315,6 +315,29 @@ struct DeallocOpLowering : public ConvertOpToLLVMPattern<memref::DeallocOp> {
   }
 };
 
+struct AlignedDeallocOpLowering
+    : public ConvertOpToLLVMPattern<memref::DeallocOp> {
+  using ConvertOpToLLVMPattern<memref::DeallocOp>::ConvertOpToLLVMPattern;
+
+  explicit AlignedDeallocOpLowering(LLVMTypeConverter &converter)
+      : ConvertOpToLLVMPattern<memref::DeallocOp>(converter) {}
+
+  LogicalResult
+  matchAndRewrite(memref::DeallocOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    // Insert the `free` declaration if it is not already present.
+    auto freeFunc =
+        LLVM::lookupOrCreateAlignedFreeFn(op->getParentOfType<ModuleOp>());
+    MemRefDescriptor memref(adaptor.memref());
+    Value casted = rewriter.create<LLVM::BitcastOp>(
+        op.getLoc(), getVoidPtrType(),
+        memref.allocatedPtr(rewriter, op.getLoc()));
+    rewriter.replaceOpWithNewOp<LLVM::CallOp>(
+        op, TypeRange(), SymbolRefAttr::get(freeFunc), casted);
+    return success();
+  }
+};
+
 // A `dim` is converted to a constant for static sizes and to an access to the
 // size stored in the memref descriptor for dynamic sizes.
 struct DimOpLowering : public ConvertOpToLLVMPattern<memref::DimOp> {
@@ -2026,7 +2049,7 @@ void mlir::populateMemRefToLLVMConversionPatterns(LLVMTypeConverter &converter,
   // clang-format on
   auto allocLowering = converter.getOptions().allocLowering;
   if (allocLowering == LowerToLLVMOptions::AllocLowering::AlignedAlloc)
-    patterns.add<AlignedAllocOpLowering, DeallocOpLowering>(converter);
+    patterns.add<AlignedAllocOpLowering, AlignedDeallocOpLowering>(converter);
   else if (allocLowering == LowerToLLVMOptions::AllocLowering::Malloc)
     patterns.add<AllocOpLowering, DeallocOpLowering>(converter);
 }

diff  --git a/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp b/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp
index c3f8fcb422402..e0dd5f6de84fe 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp
@@ -32,9 +32,10 @@ static constexpr llvm::StringRef kPrintOpen = "printOpen";
 static constexpr llvm::StringRef kPrintClose = "printClose";
 static constexpr llvm::StringRef kPrintComma = "printComma";
 static constexpr llvm::StringRef kPrintNewline = "printNewline";
-static constexpr llvm::StringRef kMalloc = "malloc";
-static constexpr llvm::StringRef kAlignedAlloc = "aligned_alloc";
-static constexpr llvm::StringRef kFree = "free";
+static constexpr llvm::StringRef kMalloc = "_mlir_alloc";
+static constexpr llvm::StringRef kAlignedAlloc = "_mlir_aligned_alloc";
+static constexpr llvm::StringRef kFree = "_mlir_free";
+static constexpr llvm::StringRef kAlignedFree = "_mlir_aligned_free";
 static constexpr llvm::StringRef kMemRefCopy = "memrefCopy";
 
 /// Generic print function lookupOrCreate helper.
@@ -115,6 +116,13 @@ LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreateFreeFn(ModuleOp moduleOp) {
       LLVM::LLVMVoidType::get(moduleOp->getContext()));
 }
 
+LLVM::LLVMFuncOp mlir::LLVM::lookupOrCreateAlignedFreeFn(ModuleOp moduleOp) {
+  return LLVM::lookupOrCreateFn(
+      moduleOp, kAlignedFree,
+      LLVM::LLVMPointerType::get(IntegerType::get(moduleOp->getContext(), 8)),
+      LLVM::LLVMVoidType::get(moduleOp->getContext()));
+}
+
 LLVM::LLVMFuncOp
 mlir::LLVM::lookupOrCreateMemRefCopyFn(ModuleOp moduleOp, Type indexType,
                                        Type unrankedDescriptorType) {

diff  --git a/mlir/lib/ExecutionEngine/RunnerUtils.cpp b/mlir/lib/ExecutionEngine/RunnerUtils.cpp
index a9ad4a9b7d6e7..d5eae9451e398 100644
--- a/mlir/lib/ExecutionEngine/RunnerUtils.cpp
+++ b/mlir/lib/ExecutionEngine/RunnerUtils.cpp
@@ -16,8 +16,32 @@
 #include "mlir/ExecutionEngine/RunnerUtils.h"
 #include <chrono>
 
+#ifdef _MSC_VER
+#include "malloc.h"
+#endif
+
 // NOLINTBEGIN(*-identifier-naming)
 
+extern "C" void *_mlir_alloc(uint64_t size) { return malloc(size); }
+
+extern "C" void *_mlir_aligned_alloc(uint64_t alignment, uint64_t size) {
+#ifdef _MSC_VER
+  return _aligned_malloc(size, alignment);
+#else
+  return aligned_alloc(alignment, size);
+#endif
+}
+
+extern "C" void _mlir_free(void *ptr) { free(ptr); }
+
+extern "C" void _mlir_aligned_free(void *ptr) {
+#ifdef _MSC_VER
+  _aligned_free(ptr);
+#else
+  free(ptr);
+#endif
+}
+
 extern "C" void _mlir_ciface_printMemrefShapeI8(UnrankedMemRefType<int8_t> *M) {
   std::cout << "Unranked Memref ";
   printMemRefMetaData(std::cout, DynamicMemRefType<int8_t>(*M));

diff  --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
index 4855571c05bd5..047c53fc9a305 100644
--- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
@@ -1116,12 +1116,17 @@ prepareLLVMModule(Operation *m, llvm::LLVMContext &llvmContext,
           m->getAttr(LLVM::LLVMDialect::getTargetTripleAttrName()))
     llvmModule->setTargetTriple(targetTripleAttr.cast<StringAttr>().getValue());
 
-  // Inject declarations for `malloc` and `free` functions that can be used in
-  // memref allocation/deallocation coming from standard ops lowering.
+  // Inject declarations for `_mlir_alloc`, `_mlir_aligned_alloc` and
+  // `_mlir_free` functions that can be used in memref allocation / deallocation
+  // coming from standard ops lowering.
   llvm::IRBuilder<> builder(llvmContext);
-  llvmModule->getOrInsertFunction("malloc", builder.getInt8PtrTy(),
+  llvmModule->getOrInsertFunction("_mlir_alloc", builder.getInt8PtrTy(),
                                   builder.getInt64Ty());
-  llvmModule->getOrInsertFunction("free", builder.getVoidTy(),
+  llvmModule->getOrInsertFunction("_mlir_aligned_alloc", builder.getInt8PtrTy(),
+                                  builder.getInt64Ty(), builder.getInt64Ty());
+  llvmModule->getOrInsertFunction("_mlir_free", builder.getVoidTy(),
+                                  builder.getInt8PtrTy());
+  llvmModule->getOrInsertFunction("_mlir_aligned_free", builder.getVoidTy(),
                                   builder.getInt8PtrTy());
 
   return llvmModule;

diff  --git a/mlir/test/Conversion/AsyncToLLVM/convert-coro-to-llvm.mlir b/mlir/test/Conversion/AsyncToLLVM/convert-coro-to-llvm.mlir
index 404be5c7b3710..de67c817c52e9 100644
--- a/mlir/test/Conversion/AsyncToLLVM/convert-coro-to-llvm.mlir
+++ b/mlir/test/Conversion/AsyncToLLVM/convert-coro-to-llvm.mlir
@@ -21,7 +21,7 @@ func.func @coro_begin() {
   // CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : i64) : i64
   // CHECK: %[[NEGATED_ALIGN:.*]] = llvm.sub %[[C0]], %[[ALIGN]]  : i64
   // CHECK: %[[ROUNDED_SIZE:.*]] = llvm.and %[[SIZE_PLUS_ALIGN_MINUS_ONE]], %[[NEGATED_ALIGN]] : i64
-  // CHECK: %[[ALLOC:.*]] = llvm.call @aligned_alloc(%[[ALIGN]], %[[ROUNDED_SIZE]])
+  // CHECK: %[[ALLOC:.*]] = llvm.call @_mlir_aligned_alloc(%[[ALIGN]], %[[ROUNDED_SIZE]])
   // CHECK: %[[HDL:.*]] = llvm.intr.coro.begin %[[ID]], %[[ALLOC]]
   %1 = async.coro.begin %0
   return
@@ -34,7 +34,7 @@ func.func @coro_free() {
   // CHECK: %[[HDL:.*]] = llvm.intr.coro.begin
   %1 = async.coro.begin %0
   // CHECK: %[[MEM:.*]] = llvm.intr.coro.free %[[ID]], %[[HDL]]
-  // CHECK: llvm.call @free(%[[MEM]])
+  // CHECK: llvm.call @_mlir_aligned_free(%[[MEM]])
   async.coro.free %0, %1
   return
 }

diff  --git a/mlir/test/Conversion/AsyncToLLVM/convert-to-llvm.mlir b/mlir/test/Conversion/AsyncToLLVM/convert-to-llvm.mlir
index 05bffc6967bd3..c6141ece955da 100644
--- a/mlir/test/Conversion/AsyncToLLVM/convert-to-llvm.mlir
+++ b/mlir/test/Conversion/AsyncToLLVM/convert-to-llvm.mlir
@@ -61,7 +61,7 @@ func.func @execute_no_async_args(%arg0: f32, %arg1: memref<1xf32>) {
 // Delete coroutine.
 // CHECK: ^[[CLEANUP]]:
 // CHECK: %[[MEM:.*]] = llvm.intr.coro.free
-// CHECK: llvm.call @free(%[[MEM]])
+// CHECK: llvm.call @_mlir_aligned_free(%[[MEM]])
 
 // Suspend coroutine, and also a return statement for ramp function.
 // CHECK: ^[[SUSPEND]]:

diff  --git a/mlir/test/Conversion/FuncToLLVM/calling-convention.mlir b/mlir/test/Conversion/FuncToLLVM/calling-convention.mlir
index 8dc5dac7f2d5f..410964b06d21a 100644
--- a/mlir/test/Conversion/FuncToLLVM/calling-convention.mlir
+++ b/mlir/test/Conversion/FuncToLLVM/calling-convention.mlir
@@ -135,7 +135,7 @@ func.func @return_var_memref_caller(%arg0: memref<4x3xf32>) {
   // CHECK: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOC_SIZE]] x i8
   // CHECK: %[[SOURCE:.*]] = llvm.extractvalue %[[CALL_RES]][1]
   // CHECK: "llvm.intr.memcpy"(%[[ALLOCA]], %[[SOURCE]], %[[ALLOC_SIZE]], %[[FALSE]])
-  // CHECK: llvm.call @free(%[[SOURCE]])
+  // CHECK: llvm.call @_mlir_free(%[[SOURCE]])
   // CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
   // CHECK: %[[RANK:.*]] = llvm.extractvalue %[[CALL_RES]][0] : !llvm.struct<(i64, ptr<i8>)>
   // CHECK: %[[DESC_1:.*]] = llvm.insertvalue %[[RANK]], %[[DESC]][0]
@@ -167,7 +167,7 @@ func.func @return_var_memref(%arg0: memref<4x3xf32>) -> memref<*xf32> attributes
   // CHECK: %[[TABLES_SIZE:.*]] = llvm.mul %[[DOUBLE_RANK_INC]], %[[IDX_SIZE]]
   // CHECK: %[[ALLOC_SIZE:.*]] = llvm.add %[[DOUBLE_PTR_SIZE]], %[[TABLES_SIZE]]
   // CHECK: %[[FALSE:.*]] = llvm.mlir.constant(false)
-  // CHECK: %[[ALLOCATED:.*]] = llvm.call @malloc(%[[ALLOC_SIZE]])
+  // CHECK: %[[ALLOCATED:.*]] = llvm.call @_mlir_alloc(%[[ALLOC_SIZE]])
   // CHECK: "llvm.intr.memcpy"(%[[ALLOCATED]], %[[MEMORY]], %[[ALLOC_SIZE]], %[[FALSE]])
   // CHECK: %[[NEW_DESC:.*]] = llvm.mlir.undef : !llvm.struct<(i64, ptr<i8>)>
   // CHECK: %[[NEW_DESC_1:.*]] = llvm.insertvalue %[[RANK]], %[[NEW_DESC]][0]
@@ -193,7 +193,7 @@ func.func @return_two_var_memref_caller(%arg0: memref<4x3xf32>) {
   // CHECK: %[[ALLOCA_1:.*]] = llvm.alloca %{{.*}} x i8
   // CHECK: %[[SOURCE_1:.*]] = llvm.extractvalue %[[RES_1:.*]][1] : ![[DESC_TYPE:.*]]
   // CHECK: "llvm.intr.memcpy"(%[[ALLOCA_1]], %[[SOURCE_1]], %{{.*}}, %[[FALSE:.*]])
-  // CHECK: llvm.call @free(%[[SOURCE_1]])
+  // CHECK: llvm.call @_mlir_free(%[[SOURCE_1]])
   // CHECK: %[[DESC_1:.*]] = llvm.mlir.undef : ![[DESC_TYPE]]
   // CHECK: %[[DESC_11:.*]] = llvm.insertvalue %{{.*}}, %[[DESC_1]][0]
   // CHECK: llvm.insertvalue %[[ALLOCA_1]], %[[DESC_11]][1]
@@ -201,7 +201,7 @@ func.func @return_two_var_memref_caller(%arg0: memref<4x3xf32>) {
   // CHECK: %[[ALLOCA_2:.*]] = llvm.alloca %{{.*}} x i8
   // CHECK: %[[SOURCE_2:.*]] = llvm.extractvalue %[[RES_2:.*]][1]
   // CHECK: "llvm.intr.memcpy"(%[[ALLOCA_2]], %[[SOURCE_2]], %{{.*}}, %[[FALSE]])
-  // CHECK: llvm.call @free(%[[SOURCE_2]])
+  // CHECK: llvm.call @_mlir_free(%[[SOURCE_2]])
   // CHECK: %[[DESC_2:.*]] = llvm.mlir.undef : ![[DESC_TYPE]]
   // CHECK: %[[DESC_21:.*]] = llvm.insertvalue %{{.*}}, %[[DESC_2]][0]
   // CHECK: llvm.insertvalue %[[ALLOCA_2]], %[[DESC_21]][1]
@@ -222,13 +222,13 @@ func.func @return_two_var_memref(%arg0: memref<4x3xf32>) -> (memref<*xf32>, memr
   // separately, even if both operands are the same value. The calling
   // convention requires the caller to free them and the caller cannot know
   // whether they are the same value or not.
-  // CHECK: %[[ALLOCATED_1:.*]] = llvm.call @malloc(%{{.*}})
+  // CHECK: %[[ALLOCATED_1:.*]] = llvm.call @_mlir_alloc(%{{.*}})
   // CHECK: "llvm.intr.memcpy"(%[[ALLOCATED_1]], %[[MEMORY]], %{{.*}}, %[[FALSE:.*]])
   // CHECK: %[[RES_1:.*]] = llvm.mlir.undef
   // CHECK: %[[RES_11:.*]] = llvm.insertvalue %{{.*}}, %[[RES_1]][0]
   // CHECK: %[[RES_12:.*]] = llvm.insertvalue %[[ALLOCATED_1]], %[[RES_11]][1]
 
-  // CHECK: %[[ALLOCATED_2:.*]] = llvm.call @malloc(%{{.*}})
+  // CHECK: %[[ALLOCATED_2:.*]] = llvm.call @_mlir_alloc(%{{.*}})
   // CHECK: "llvm.intr.memcpy"(%[[ALLOCATED_2]], %[[MEMORY]], %{{.*}}, %[[FALSE]])
   // CHECK: %[[RES_2:.*]] = llvm.mlir.undef
   // CHECK: %[[RES_21:.*]] = llvm.insertvalue %{{.*}}, %[[RES_2]][0]

diff  --git a/mlir/test/Conversion/MemRefToLLVM/convert-dynamic-memref-ops.mlir b/mlir/test/Conversion/MemRefToLLVM/convert-dynamic-memref-ops.mlir
index 639d89976baf4..67c0e94572a33 100644
--- a/mlir/test/Conversion/MemRefToLLVM/convert-dynamic-memref-ops.mlir
+++ b/mlir/test/Conversion/MemRefToLLVM/convert-dynamic-memref-ops.mlir
@@ -14,7 +14,7 @@ func.func @mixed_alloc(%arg0: index, %arg1: index) -> memref<?x42x?xf32> {
 //  CHECK-NEXT:  %[[null:.*]] = llvm.mlir.null : !llvm.ptr<f32>
 //  CHECK-NEXT:  %[[gep:.*]] = llvm.getelementptr %[[null]][%[[sz]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
 //  CHECK-NEXT:  %[[sz_bytes:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr<f32> to i64
-//  CHECK-NEXT:  llvm.call @malloc(%[[sz_bytes]]) : (i64) -> !llvm.ptr<i8>
+//  CHECK-NEXT:  llvm.call @_mlir_alloc(%[[sz_bytes]]) : (i64) -> !llvm.ptr<i8>
 //  CHECK-NEXT:  llvm.bitcast %{{.*}} : !llvm.ptr<i8> to !llvm.ptr<f32>
 //  CHECK-NEXT:  llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<3 x i64>, array<3 x i64>)>
 //  CHECK-NEXT:  llvm.insertvalue %{{.*}}, %{{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<3 x i64>, array<3 x i64>)>
@@ -37,7 +37,7 @@ func.func @mixed_alloc(%arg0: index, %arg1: index) -> memref<?x42x?xf32> {
 func.func @mixed_dealloc(%arg0: memref<?x42x?xf32>) {
 //      CHECK:  %[[ptr:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<3 x i64>, array<3 x i64>)>
 // CHECK-NEXT:  %[[ptri8:.*]] = llvm.bitcast %[[ptr]] : !llvm.ptr<f32> to !llvm.ptr<i8>
-// CHECK-NEXT:  llvm.call @free(%[[ptri8]]) : (!llvm.ptr<i8>) -> ()
+// CHECK-NEXT:  llvm.call @_mlir_free(%[[ptri8]]) : (!llvm.ptr<i8>) -> ()
   memref.dealloc %arg0 : memref<?x42x?xf32>
   return
 }
@@ -54,7 +54,7 @@ func.func @dynamic_alloc(%arg0: index, %arg1: index) -> memref<?x?xf32> {
 //  CHECK-NEXT:  %[[null:.*]] = llvm.mlir.null : !llvm.ptr<f32>
 //  CHECK-NEXT:  %[[gep:.*]] = llvm.getelementptr %[[null]][%[[sz]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
 //  CHECK-NEXT:  %[[sz_bytes:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr<f32> to i64
-//  CHECK-NEXT:  llvm.call @malloc(%[[sz_bytes]]) : (i64) -> !llvm.ptr<i8>
+//  CHECK-NEXT:  llvm.call @_mlir_alloc(%[[sz_bytes]]) : (i64) -> !llvm.ptr<i8>
 //  CHECK-NEXT:  llvm.bitcast %{{.*}} : !llvm.ptr<i8> to !llvm.ptr<f32>
 //  CHECK-NEXT:  llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
 //  CHECK-NEXT:  llvm.insertvalue %{{.*}}, %{{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
@@ -110,7 +110,7 @@ func.func @dynamic_alloca(%arg0: index, %arg1: index) -> memref<?x?xf32> {
 func.func @dynamic_dealloc(%arg0: memref<?x?xf32>) {
 //      CHECK:  %[[ptr:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
 // CHECK-NEXT:  %[[ptri8:.*]] = llvm.bitcast %[[ptr]] : !llvm.ptr<f32> to !llvm.ptr<i8>
-// CHECK-NEXT:  llvm.call @free(%[[ptri8]]) : (!llvm.ptr<i8>) -> ()
+// CHECK-NEXT:  llvm.call @_mlir_free(%[[ptri8]]) : (!llvm.ptr<i8>) -> ()
   memref.dealloc %arg0 : memref<?x?xf32>
   return
 }
@@ -128,24 +128,24 @@ func.func @stdlib_aligned_alloc(%N : index) -> memref<32x18xf32> {
 // ALIGNED-ALLOC-NEXT:  %[[gep:.*]] = llvm.getelementptr %[[null]][%[[num_elems]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
 // ALIGNED-ALLOC-NEXT:  %[[bytes:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr<f32> to i64
 // ALIGNED-ALLOC-NEXT:  %[[alignment:.*]] = llvm.mlir.constant(32 : index) : i64
-// ALIGNED-ALLOC-NEXT:  %[[allocated:.*]] = llvm.call @aligned_alloc(%[[alignment]], %[[bytes]]) : (i64, i64) -> !llvm.ptr<i8>
+// ALIGNED-ALLOC-NEXT:  %[[allocated:.*]] = llvm.call @_mlir_aligned_alloc(%[[alignment]], %[[bytes]]) : (i64, i64) -> !llvm.ptr<i8>
 // ALIGNED-ALLOC-NEXT:  llvm.bitcast %[[allocated]] : !llvm.ptr<i8> to !llvm.ptr<f32>
   %0 = memref.alloc() {alignment = 32} : memref<32x18xf32>
   // Do another alloc just to test that we have a unique declaration for
   // aligned_alloc.
-  // ALIGNED-ALLOC:  llvm.call @aligned_alloc
+  // ALIGNED-ALLOC:  llvm.call @_mlir_aligned_alloc
   %1 = memref.alloc() {alignment = 64} : memref<4096xf32>
 
   // Alignment is to element type boundaries (minimum 16 bytes).
   // ALIGNED-ALLOC:  %[[c32:.*]] = llvm.mlir.constant(32 : index) : i64
-  // ALIGNED-ALLOC-NEXT:  llvm.call @aligned_alloc(%[[c32]]
+  // ALIGNED-ALLOC-NEXT:  llvm.call @_mlir_aligned_alloc(%[[c32]]
   %2 = memref.alloc() : memref<4096xvector<8xf32>>
   // The minimum alignment is 16 bytes unless explicitly specified.
   // ALIGNED-ALLOC:  %[[c16:.*]] = llvm.mlir.constant(16 : index) : i64
-  // ALIGNED-ALLOC-NEXT:  llvm.call @aligned_alloc(%[[c16]],
+  // ALIGNED-ALLOC-NEXT:  llvm.call @_mlir_aligned_alloc(%[[c16]],
   %3 = memref.alloc() : memref<4096xvector<2xf32>>
   // ALIGNED-ALLOC:  %[[c8:.*]] = llvm.mlir.constant(8 : index) : i64
-  // ALIGNED-ALLOC-NEXT:  llvm.call @aligned_alloc(%[[c8]],
+  // ALIGNED-ALLOC-NEXT:  llvm.call @_mlir_aligned_alloc(%[[c8]],
   %4 = memref.alloc() {alignment = 8} : memref<1024xvector<4xf32>>
   // Bump the memref allocation size if its size is not a multiple of alignment.
   // ALIGNED-ALLOC:       %[[c32:.*]] = llvm.mlir.constant(32 : index) : i64
@@ -154,11 +154,11 @@ func.func @stdlib_aligned_alloc(%N : index) -> memref<32x18xf32> {
   // ALIGNED-ALLOC-NEXT:  llvm.add
   // ALIGNED-ALLOC-NEXT:  llvm.urem
   // ALIGNED-ALLOC-NEXT:  %[[SIZE_ALIGNED:.*]] = llvm.sub
-  // ALIGNED-ALLOC-NEXT:  llvm.call @aligned_alloc(%[[c32]], %[[SIZE_ALIGNED]])
+  // ALIGNED-ALLOC-NEXT:  llvm.call @_mlir_aligned_alloc(%[[c32]], %[[SIZE_ALIGNED]])
   %5 = memref.alloc() {alignment = 32} : memref<100xf32>
   // Bump alignment to the next power of two if it isn't.
   // ALIGNED-ALLOC:  %[[c128:.*]] = llvm.mlir.constant(128 : index) : i64
-  // ALIGNED-ALLOC:  llvm.call @aligned_alloc(%[[c128]]
+  // ALIGNED-ALLOC:  llvm.call @_mlir_aligned_alloc(%[[c128]]
   %6 = memref.alloc(%N) : memref<?xvector<18xf32>>
   return %0 : memref<32x18xf32>
 }
@@ -551,7 +551,7 @@ func.func @memref_of_memref() {
   // ALIGNED-ALLOC: llvm.mlir.constant(64 : index)
 
   // Check that the types are converted as expected.
-  // ALIGNED-ALLOC: llvm.call @aligned_alloc
+  // ALIGNED-ALLOC: llvm.call @_mlir_aligned_alloc
   // ALIGNED-ALLOC: llvm.bitcast %{{.*}} : !llvm.ptr<i8> to
   // ALIGNED-ALLOC-SAME: !llvm.
   // ALIGNED-ALLOC-SAME: [[INNER:ptr<struct<\(ptr<f32>, ptr<f32>, i64, array<1 x i64>, array<1 x i64>\)>>]]
@@ -576,7 +576,7 @@ module attributes { dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<index, 32>> } {
     // ALIGNED-ALLOC: llvm.mlir.constant(32 : index)
 
     // Check that the types are converted as expected.
-    // ALIGNED-ALLOC: llvm.call @aligned_alloc
+    // ALIGNED-ALLOC: llvm.call @_mlir_aligned_alloc
     // ALIGNED-ALLOC: llvm.bitcast %{{.*}} : !llvm.ptr<i8> to
     // ALIGNED-ALLOC-SAME: !llvm.
     // ALIGNED-ALLOC-SAME: [[INNER:ptr<struct<\(ptr<f32>, ptr<f32>, i32, array<1 x i32>, array<1 x i32>\)>>]]
@@ -606,7 +606,7 @@ func.func @memref_of_memref_of_memref() {
   // Static alignment should be computed as ceilPowerOf2(2 * sizeof(pointer) +
   // (1 + 2 * rank) * sizeof(index) = ceilPowerOf2(2 * 8 + 3 * 8) = 64.
   // ALIGNED-ALLOC: llvm.mlir.constant(64 : index)
-  // ALIGNED-ALLOC: llvm.call @aligned_alloc
+  // ALIGNED-ALLOC: llvm.call @_mlir_aligned_alloc
   %0 = memref.alloc() : memref<1 x memref<2 x memref<3 x f32>>>
   return
 }
@@ -623,7 +623,7 @@ func.func @ranked_unranked() {
   // Static alignment should be computed as ceilPowerOf2(sizeof(index) +
   // sizeof(pointer)) = 16.
   // ALIGNED-ALLOC: llvm.mlir.constant(16 : index)
-  // ALIGNED-ALLOC: llvm.call @aligned_alloc
+  // ALIGNED-ALLOC: llvm.call @_mlir_aligned_alloc
   // ALIGNED-ALLOC: llvm.bitcast
   // ALIGNED-ALLOC-SAME: !llvm.ptr<i8> to !llvm.[[INNER]]
   %0 = memref.alloc() : memref<1 x memref<* x f32>>

diff  --git a/mlir/test/Conversion/MemRefToLLVM/convert-static-memref-ops.mlir b/mlir/test/Conversion/MemRefToLLVM/convert-static-memref-ops.mlir
index 1296f8e4881cd..01a1499fe5901 100644
--- a/mlir/test/Conversion/MemRefToLLVM/convert-static-memref-ops.mlir
+++ b/mlir/test/Conversion/MemRefToLLVM/convert-static-memref-ops.mlir
@@ -6,7 +6,7 @@ func.func @zero_d_alloc() -> memref<f32> {
 // CHECK: %[[null:.*]] = llvm.mlir.null : !llvm.ptr<f32>
 // CHECK: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
 // CHECK: %[[size_bytes:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr<f32> to i64
-// CHECK: llvm.call @malloc(%[[size_bytes]]) : (i64) -> !llvm.ptr<i8>
+// CHECK: llvm.call @_mlir_alloc(%[[size_bytes]]) : (i64) -> !llvm.ptr<i8>
 // CHECK: %[[ptr:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<i8> to !llvm.ptr<f32>
 // CHECK: llvm.mlir.undef : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
 // CHECK: llvm.insertvalue %[[ptr]], %{{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
@@ -26,7 +26,7 @@ func.func @zero_d_dealloc(%arg0: memref<f32>) {
 // CHECK: unrealized_conversion_cast
 // CHECK: %[[ptr:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64)>
 // CHECK: %[[bc:.*]] = llvm.bitcast %[[ptr]] : !llvm.ptr<f32> to !llvm.ptr<i8>
-// CHECK: llvm.call @free(%[[bc]]) : (!llvm.ptr<i8>) -> ()
+// CHECK: llvm.call @_mlir_free(%[[bc]]) : (!llvm.ptr<i8>) -> ()
 
   memref.dealloc %arg0 : memref<f32>
   return
@@ -43,7 +43,7 @@ func.func @aligned_1d_alloc() -> memref<42xf32> {
 // CHECK: %[[size_bytes:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr<f32> to i64
 // CHECK: %[[alignment:.*]] = llvm.mlir.constant(8 : index) : i64
 // CHECK: %[[allocsize:.*]] = llvm.add %[[size_bytes]], %[[alignment]] : i64
-// CHECK: %[[allocated:.*]] = llvm.call @malloc(%[[allocsize]]) : (i64) -> !llvm.ptr<i8>
+// CHECK: %[[allocated:.*]] = llvm.call @_mlir_alloc(%[[allocsize]]) : (i64) -> !llvm.ptr<i8>
 // CHECK: %[[ptr:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<i8> to !llvm.ptr<f32>
 // CHECK: %[[allocatedAsInt:.*]] = llvm.ptrtoint %[[ptr]] : !llvm.ptr<f32> to i64
 // CHECK: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : i64
@@ -69,7 +69,7 @@ func.func @static_alloc() -> memref<32x18xf32> {
 // CHECK: %[[null:.*]] = llvm.mlir.null : !llvm.ptr<f32>
 // CHECK: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[num_elems]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
 // CHECK: %[[size_bytes:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr<f32> to i64
-// CHECK: %[[allocated:.*]] = llvm.call @malloc(%[[size_bytes]]) : (i64) -> !llvm.ptr<i8>
+// CHECK: %[[allocated:.*]] = llvm.call @_mlir_alloc(%[[size_bytes]]) : (i64) -> !llvm.ptr<i8>
 // CHECK: llvm.bitcast %[[allocated]] : !llvm.ptr<i8> to !llvm.ptr<f32>
  %0 = memref.alloc() : memref<32x18xf32>
  return %0 : memref<32x18xf32>
@@ -106,7 +106,7 @@ func.func @static_alloca() -> memref<32x18xf32> {
 func.func @static_dealloc(%static: memref<10x8xf32>) {
 // CHECK: %[[ptr:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
 // CHECK: %[[bc:.*]] = llvm.bitcast %[[ptr]] : !llvm.ptr<f32> to !llvm.ptr<i8>
-// CHECK: llvm.call @free(%[[bc]]) : (!llvm.ptr<i8>) -> ()
+// CHECK: llvm.call @_mlir_free(%[[bc]]) : (!llvm.ptr<i8>) -> ()
   memref.dealloc %static : memref<10x8xf32>
   return
 }
@@ -206,7 +206,7 @@ module attributes { dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<index, 32>> } {
     // CHECK: llvm.ptrtoint %{{.*}} : !llvm.ptr<{{.*}}> to i32
     // CHECK: llvm.ptrtoint %{{.*}} : !llvm.ptr<{{.*}}> to i32
     // CHECK: llvm.add %{{.*}} : i32
-    // CHECK: llvm.call @malloc(%{{.*}}) : (i32) -> !llvm.ptr
+    // CHECK: llvm.call @_mlir_alloc(%{{.*}}) : (i32) -> !llvm.ptr
     // CHECK: llvm.ptrtoint %{{.*}} : !llvm.ptr<{{.*}}> to i32
     // CHECK: llvm.sub {{.*}} : i32
     // CHECK: llvm.add {{.*}} : i32

diff  --git a/mlir/test/Target/LLVMIR/llvmir.mlir b/mlir/test/Target/LLVMIR/llvmir.mlir
index 9a30a9d4a719a..1668da4b83ee7 100644
--- a/mlir/test/Target/LLVMIR/llvmir.mlir
+++ b/mlir/test/Target/LLVMIR/llvmir.mlir
@@ -147,9 +147,9 @@ llvm.mlir.global internal constant @sectionvar("teststring")  {section = ".mysec
 // inserted before other functions in the module.
 //
 
-// CHECK: declare ptr @malloc(i64)
-llvm.func @malloc(i64) -> !llvm.ptr<i8>
-// CHECK: declare void @free(ptr)
+// CHECK: declare ptr @_mlir_alloc(i64)
+llvm.func @_mlir_alloc(i64) -> !llvm.ptr<i8>
+// CHECK: declare void @_mlir_free(ptr)
 
 
 //
@@ -499,7 +499,7 @@ llvm.func @dso_local_func() attributes {dso_local} {
 
 // CHECK-LABEL: define void @memref_alloc()
 llvm.func @memref_alloc() {
-// CHECK-NEXT: %{{[0-9]+}} = call ptr @malloc(i64 400)
+// CHECK-NEXT: %{{[0-9]+}} = call ptr @_mlir_alloc(i64 400)
 // CHECK-NEXT: %{{[0-9]+}} = insertvalue { ptr } undef, ptr %{{[0-9]+}}, 0
   %0 = llvm.mlir.constant(10 : index) : i64
   %1 = llvm.mlir.constant(10 : index) : i64
@@ -507,7 +507,7 @@ llvm.func @memref_alloc() {
   %3 = llvm.mlir.undef : !llvm.struct<(ptr<f32>)>
   %4 = llvm.mlir.constant(4 : index) : i64
   %5 = llvm.mul %2, %4 : i64
-  %6 = llvm.call @malloc(%5) : (i64) -> !llvm.ptr<i8>
+  %6 = llvm.call @_mlir_alloc(%5) : (i64) -> !llvm.ptr<i8>
   %7 = llvm.bitcast %6 : !llvm.ptr<i8> to !llvm.ptr<f32>
   %8 = llvm.insertvalue %7, %3[0] : !llvm.struct<(ptr<f32>)>
 // CHECK-NEXT: ret void
@@ -520,13 +520,13 @@ llvm.func @get_index() -> i64
 // CHECK-LABEL: define void @store_load_static()
 llvm.func @store_load_static() {
 ^bb0:
-// CHECK-NEXT: %{{[0-9]+}} = call ptr @malloc(i64 40)
+// CHECK-NEXT: %{{[0-9]+}} = call ptr @_mlir_alloc(i64 40)
 // CHECK-NEXT: %{{[0-9]+}} = insertvalue { ptr } undef, ptr %{{[0-9]+}}, 0
   %0 = llvm.mlir.constant(10 : index) : i64
   %1 = llvm.mlir.undef : !llvm.struct<(ptr<f32>)>
   %2 = llvm.mlir.constant(4 : index) : i64
   %3 = llvm.mul %0, %2 : i64
-  %4 = llvm.call @malloc(%3) : (i64) -> !llvm.ptr<i8>
+  %4 = llvm.call @_mlir_alloc(%3) : (i64) -> !llvm.ptr<i8>
   %5 = llvm.bitcast %4 : !llvm.ptr<i8> to !llvm.ptr<f32>
   %6 = llvm.insertvalue %5, %1[0] : !llvm.struct<(ptr<f32>)>
   %7 = llvm.mlir.constant(1.000000e+00 : f32) : f32
@@ -587,13 +587,13 @@ llvm.func @store_load_static() {
 // CHECK-LABEL: define void @store_load_dynamic(i64 {{%.*}})
 llvm.func @store_load_dynamic(%arg0: i64) {
 // CHECK-NEXT: %{{[0-9]+}} = mul i64 %{{[0-9]+}}, 4
-// CHECK-NEXT: %{{[0-9]+}} = call ptr @malloc(i64 %{{[0-9]+}})
+// CHECK-NEXT: %{{[0-9]+}} = call ptr @_mlir_alloc(i64 %{{[0-9]+}})
 // CHECK-NEXT: %{{[0-9]+}} = insertvalue { ptr, i64 } undef, ptr %{{[0-9]+}}, 0
 // CHECK-NEXT: %{{[0-9]+}} = insertvalue { ptr, i64 } %{{[0-9]+}}, i64 %{{[0-9]+}}, 1
   %0 = llvm.mlir.undef : !llvm.struct<(ptr<f32>, i64)>
   %1 = llvm.mlir.constant(4 : index) : i64
   %2 = llvm.mul %arg0, %1 : i64
-  %3 = llvm.call @malloc(%2) : (i64) -> !llvm.ptr<i8>
+  %3 = llvm.call @_mlir_alloc(%2) : (i64) -> !llvm.ptr<i8>
   %4 = llvm.bitcast %3 : !llvm.ptr<i8> to !llvm.ptr<f32>
   %5 = llvm.insertvalue %4, %0[0] : !llvm.struct<(ptr<f32>, i64)>
   %6 = llvm.insertvalue %arg0, %5[1] : !llvm.struct<(ptr<f32>, i64)>
@@ -660,7 +660,7 @@ llvm.func @store_load_mixed(%arg0: i64) {
 // CHECK-NEXT: %{{[0-9]+}} = mul i64 %{{[0-9]+}}, 4
 // CHECK-NEXT: %{{[0-9]+}} = mul i64 %{{[0-9]+}}, 10
 // CHECK-NEXT: %{{[0-9]+}} = mul i64 %{{[0-9]+}}, 4
-// CHECK-NEXT: %{{[0-9]+}} = call ptr @malloc(i64 %{{[0-9]+}})
+// CHECK-NEXT: %{{[0-9]+}} = call ptr @_mlir_alloc(i64 %{{[0-9]+}})
 // CHECK-NEXT: %{{[0-9]+}} = insertvalue { ptr, i64, i64 } undef, ptr %{{[0-9]+}}, 0
 // CHECK-NEXT: %{{[0-9]+}} = insertvalue { ptr, i64, i64 } %{{[0-9]+}}, i64 %{{[0-9]+}}, 1
 // CHECK-NEXT: %{{[0-9]+}} = insertvalue { ptr, i64, i64 } %{{[0-9]+}}, i64 10, 2
@@ -672,7 +672,7 @@ llvm.func @store_load_mixed(%arg0: i64) {
   %6 = llvm.mlir.undef : !llvm.struct<(ptr<f32>, i64, i64)>
   %7 = llvm.mlir.constant(4 : index) : i64
   %8 = llvm.mul %5, %7 : i64
-  %9 = llvm.call @malloc(%8) : (i64) -> !llvm.ptr<i8>
+  %9 = llvm.call @_mlir_alloc(%8) : (i64) -> !llvm.ptr<i8>
   %10 = llvm.bitcast %9 : !llvm.ptr<i8> to !llvm.ptr<f32>
   %11 = llvm.insertvalue %10, %6[0] : !llvm.struct<(ptr<f32>, i64, i64)>
   %12 = llvm.insertvalue %arg0, %11[1] : !llvm.struct<(ptr<f32>, i64, i64)>
@@ -773,7 +773,7 @@ llvm.func @memref_args_rets(%arg0: !llvm.struct<(ptr<f32>)>, %arg1: !llvm.struct
   llvm.store %2, %14 : !llvm.ptr<f32>
 // CHECK-NEXT: %{{[0-9]+}} = mul i64 10, %{{[0-9]+}}
 // CHECK-NEXT: %{{[0-9]+}} = mul i64 %{{[0-9]+}}, 4
-// CHECK-NEXT: %{{[0-9]+}} = call ptr @malloc(i64 %{{[0-9]+}})
+// CHECK-NEXT: %{{[0-9]+}} = call ptr @_mlir_alloc(i64 %{{[0-9]+}})
 // CHECK-NEXT: %{{[0-9]+}} = insertvalue { ptr, i64 } undef, ptr %{{[0-9]+}}, 0
 // CHECK-NEXT: %{{[0-9]+}} = insertvalue { ptr, i64 } %{{[0-9]+}}, i64 %{{[0-9]+}}, 1
   %15 = llvm.mlir.constant(10 : index) : i64
@@ -781,7 +781,7 @@ llvm.func @memref_args_rets(%arg0: !llvm.struct<(ptr<f32>)>, %arg1: !llvm.struct
   %17 = llvm.mlir.undef : !llvm.struct<(ptr<f32>, i64)>
   %18 = llvm.mlir.constant(4 : index) : i64
   %19 = llvm.mul %16, %18 : i64
-  %20 = llvm.call @malloc(%19) : (i64) -> !llvm.ptr<i8>
+  %20 = llvm.call @_mlir_alloc(%19) : (i64) -> !llvm.ptr<i8>
   %21 = llvm.bitcast %20 : !llvm.ptr<i8> to !llvm.ptr<f32>
   %22 = llvm.insertvalue %21, %17[0] : !llvm.struct<(ptr<f32>, i64)>
   %23 = llvm.insertvalue %1, %22[1] : !llvm.struct<(ptr<f32>, i64)>

diff  --git a/mlir/test/mlir-cpu-runner/bare-ptr-call-conv.mlir b/mlir/test/mlir-cpu-runner/bare-ptr-call-conv.mlir
index 3168b5829c116..285b9856c749b 100644
--- a/mlir/test/mlir-cpu-runner/bare-ptr-call-conv.mlir
+++ b/mlir/test/mlir-cpu-runner/bare-ptr-call-conv.mlir
@@ -1,4 +1,6 @@
-// RUN: mlir-opt %s -pass-pipeline="func.func(convert-scf-to-cf,convert-arith-to-llvm),convert-memref-to-llvm,convert-func-to-llvm{use-bare-ptr-memref-call-conv=1}" -reconcile-unrealized-casts | mlir-cpu-runner -shared-libs=%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext -entry-point-result=void | FileCheck %s
+// RUN: mlir-opt %s -pass-pipeline="func.func(convert-scf-to-cf,convert-arith-to-llvm),convert-memref-to-llvm,convert-func-to-llvm{use-bare-ptr-memref-call-conv=1}" -reconcile-unrealized-casts \
+// RUN: | mlir-cpu-runner -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext -entry-point-result=void \
+// RUN: | FileCheck %s
 
 // Verify bare pointer memref calling convention. `simple_add1_add2_test`
 // gets two 2xf32 memrefs, adds 1.0f to the first one and 2.0f to the second
@@ -26,8 +28,8 @@ func.func @simple_add1_add2_test(%arg0: memref<2xf32>, %arg1: memref<2xf32>) {
 }
 
 // External declarations.
-llvm.func @malloc(i64) -> !llvm.ptr<i8>
-llvm.func @free(!llvm.ptr<i8>)
+llvm.func @_mlir_alloc(i64) -> !llvm.ptr<i8>
+llvm.func @_mlir_free(!llvm.ptr<i8>)
 func.func private @printF32(%arg0: f32)
 func.func private @printComma()
 func.func private @printNewline()

diff  --git a/mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir b/mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir
index 57ca74f870bd4..488bcec50597d 100644
--- a/mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir
+++ b/mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir
@@ -1,4 +1,6 @@
-// RUN: mlir-opt -pass-pipeline="func.func(convert-linalg-to-loops,lower-affine,convert-scf-to-cf,convert-arith-to-llvm),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" %s | mlir-cpu-runner -O3 -e main -entry-point-result=void -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | FileCheck %s
+// RUN: mlir-opt -pass-pipeline="func.func(convert-linalg-to-loops,lower-affine,convert-scf-to-cf,convert-arith-to-llvm),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" %s \
+// RUN: | mlir-cpu-runner -O3 -e main -entry-point-result=void -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \
+// RUN: | FileCheck %s
 
 func.func @main() {
   %A = memref.alloc() : memref<16x16xf32>

diff  --git a/mlir/test/mlir-cpu-runner/simple.mlir b/mlir/test/mlir-cpu-runner/simple.mlir
index 72d241439d06e..5bf7c10e5db32 100644
--- a/mlir/test/mlir-cpu-runner/simple.mlir
+++ b/mlir/test/mlir-cpu-runner/simple.mlir
@@ -1,22 +1,36 @@
-// RUN: mlir-cpu-runner %s | FileCheck %s
-// RUN: mlir-cpu-runner %s -e foo | FileCheck -check-prefix=NOMAIN %s
-// RUN: mlir-cpu-runner %s --entry-point-result=i32 -e int32_main | FileCheck -check-prefix=INT32MAIN %s
-// RUN: mlir-cpu-runner %s --entry-point-result=i64 -e int64_main | FileCheck -check-prefix=INT64MAIN %s
-// RUN: mlir-cpu-runner %s -O3 | FileCheck %s
+// RUN: mlir-cpu-runner %s \
+// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \
+// RUN: | FileCheck %s
+
+// RUN: mlir-cpu-runner %s -e foo \
+// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \
+// RUN: | FileCheck -check-prefix=NOMAIN %s
+
+// RUN: mlir-cpu-runner %s --entry-point-result=i32 -e int32_main \
+// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \
+// RUN: | FileCheck -check-prefix=INT32MAIN %s
+
+// RUN: mlir-cpu-runner %s --entry-point-result=i64 -e int64_main \
+// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \
+// RUN: | FileCheck -check-prefix=INT64MAIN %s
+
+// RUN: mlir-cpu-runner %s -O3 \
+// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \
+// RUN: | FileCheck %s
 
 // RUN: cp %s %t
-// RUN: mlir-cpu-runner %t -dump-object-file | FileCheck %t
+// RUN: mlir-cpu-runner %t -dump-object-file -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | FileCheck %t
 // RUN: ls %t.o
 // RUN: rm %t.o
 
-// RUN: mlir-cpu-runner %s -dump-object-file -object-filename=%T/test.o | FileCheck %s
+// RUN: mlir-cpu-runner %s -dump-object-file -object-filename=%T/test.o -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | FileCheck %s
 // RUN: ls %T/test.o
 // RUN: rm %T/test.o
 
 // Declarations of C library functions.
 llvm.func @fabsf(f32) -> f32
-llvm.func @malloc(i64) -> !llvm.ptr<i8>
-llvm.func @free(!llvm.ptr<i8>)
+llvm.func @_mlir_alloc(i64) -> !llvm.ptr<i8>
+llvm.func @_mlir_free(!llvm.ptr<i8>)
 
 // Check that a simple function with a nested call works.
 llvm.func @main() -> f32 {
@@ -26,16 +40,16 @@ llvm.func @main() -> f32 {
 }
 // CHECK: 4.200000e+02
 
-// Helper typed functions wrapping calls to "malloc" and "free".
+// Helper typed functions wrapping calls to "_mlir_alloc" and "_mlir_free".
 llvm.func @allocation() -> !llvm.ptr<f32> {
   %0 = llvm.mlir.constant(4 : index) : i64
-  %1 = llvm.call @malloc(%0) : (i64) -> !llvm.ptr<i8>
+  %1 = llvm.call @_mlir_alloc(%0) : (i64) -> !llvm.ptr<i8>
   %2 = llvm.bitcast %1 : !llvm.ptr<i8> to !llvm.ptr<f32>
   llvm.return %2 : !llvm.ptr<f32>
 }
 llvm.func @deallocation(%arg0: !llvm.ptr<f32>) {
   %0 = llvm.bitcast %arg0 : !llvm.ptr<f32> to !llvm.ptr<i8>
-  llvm.call @free(%0) : (!llvm.ptr<i8>) -> ()
+  llvm.call @_mlir_free(%0) : (!llvm.ptr<i8>) -> ()
   llvm.return
 }