[llvm] bb15861 - [MemCpyOpt] Relax libcall checks

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 4 12:17:59 PDT 2021


Author: Nikita Popov
Date: 2021-08-04T21:17:51+02:00
New Revision: bb15861e149ac1ce01ed5bd87f760e9469b20a9d

URL: https://github.com/llvm/llvm-project/commit/bb15861e149ac1ce01ed5bd87f760e9469b20a9d
DIFF: https://github.com/llvm/llvm-project/commit/bb15861e149ac1ce01ed5bd87f760e9469b20a9d.diff

LOG: [MemCpyOpt] Relax libcall checks

Rather than blocking the whole MemCpyOpt pass if the libcalls are
not available, only disable creation of new memset/memcpy intrinsics
where only load/stores were used previously. This only affects the
store merging and load-store conversion optimization. Other
optimizations are derived from existing intrinsics, which are
well-defined in the absence of libcalls -- not having the libcalls
just means that call simplification won't convert them to intrinsics.

This is a weaker variation of D104801, which dropped these checks
entirely. Ideally we would not couple emission of intrinsics to
libcall availability at all, but as the intrinsics may be legalized
to libcalls we need to be a bit careful right now.

Differential Revision: https://reviews.llvm.org/D106769

Added: 
    llvm/test/Transforms/MemCpyOpt/no-libcalls.ll

Modified: 
    llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 2e36c50b75fc0..6ad041768e205 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -673,7 +673,12 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
         LI->getParent() == SI->getParent()) {
 
       auto *T = LI->getType();
-      if (T->isAggregateType()) {
+      // Don't introduce calls to memcpy/memmove intrinsics out of thin air if
+      // the corresponding libcalls are not available.
+      // TODO: We should really distinguish between libcall availability and
+      // our ability to introduce intrinsics.
+      if (T->isAggregateType() && TLI->has(LibFunc_memcpy) &&
+          TLI->has(LibFunc_memmove)) {
         MemoryLocation LoadLoc = MemoryLocation::get(LI);
 
         // We use alias analysis to check if an instruction may store to
@@ -796,6 +801,13 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
     }
   }
 
+  // The following code creates memset intrinsics out of thin air. Don't do
+  // this if the corresponding libfunc is not available.
+  // TODO: We should really distinguish between libcall availability and
+  // our ability to introduce intrinsics.
+  if (!TLI->has(LibFunc_memset))
+    return false;
+
   // There are two cases that are interesting for this code to handle: memcpy
   // and memset.  Right now we only handle memset.
 
@@ -1548,9 +1560,6 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
 /// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
 /// not to alias.
 bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
-  if (!TLI->has(LibFunc_memmove))
-    return false;
-
   // See if the pointers alias.
   if (!AA->isNoAlias(MemoryLocation::getForDest(M),
                      MemoryLocation::getForSource(M)))
@@ -1754,11 +1763,6 @@ bool MemCpyOptPass::runImpl(Function &F, MemoryDependenceResults *MD_,
   MSSA = MSSA_;
   MemorySSAUpdater MSSAU_(MSSA_);
   MSSAU = MSSA_ ? &MSSAU_ : nullptr;
-  // If we don't have at least memset and memcpy, there is little point of doing
-  // anything here.  These are required by a freestanding implementation, so if
-  // even they are disabled, there is no point in trying hard.
-  if (!TLI->has(LibFunc_memset) || !TLI->has(LibFunc_memcpy))
-    return false;
 
   while (true) {
     if (!iterateOnFunction(F))

diff  --git a/llvm/test/Transforms/MemCpyOpt/no-libcalls.ll b/llvm/test/Transforms/MemCpyOpt/no-libcalls.ll
new file mode 100644
index 0000000000000..c4d935158435c
--- /dev/null
+++ b/llvm/test/Transforms/MemCpyOpt/no-libcalls.ll
@@ -0,0 +1,63 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -memcpyopt < %s | FileCheck %s --check-prefixes=CHECK,LIBCALLS
+; RUN: opt -S -memcpyopt -mtriple=amdgcn-- < %s | FileCheck %s --check-prefixes=CHECK,NO-LIBCALLS
+
+; REQUIRES: amdgpu-registered-target
+
+define void @dont_create_memset(ptr %p) {
+; LIBCALLS-LABEL: @dont_create_memset(
+; LIBCALLS-NEXT:    [[P1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 1
+; LIBCALLS-NEXT:    [[P2:%.*]] = getelementptr i32, ptr [[P]], i64 2
+; LIBCALLS-NEXT:    [[P3:%.*]] = getelementptr i32, ptr [[P]], i64 3
+; LIBCALLS-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[P]], i8 0, i64 16, i1 false)
+; LIBCALLS-NEXT:    ret void
+;
+; NO-LIBCALLS-LABEL: @dont_create_memset(
+; NO-LIBCALLS-NEXT:    store i32 0, ptr [[P:%.*]], align 4
+; NO-LIBCALLS-NEXT:    [[P1:%.*]] = getelementptr i32, ptr [[P]], i64 1
+; NO-LIBCALLS-NEXT:    store i32 0, ptr [[P1]], align 4
+; NO-LIBCALLS-NEXT:    [[P2:%.*]] = getelementptr i32, ptr [[P]], i64 2
+; NO-LIBCALLS-NEXT:    store i32 0, ptr [[P2]], align 4
+; NO-LIBCALLS-NEXT:    [[P3:%.*]] = getelementptr i32, ptr [[P]], i64 3
+; NO-LIBCALLS-NEXT:    store i32 0, ptr [[P3]], align 4
+; NO-LIBCALLS-NEXT:    ret void
+;
+  store i32 0, ptr %p
+  %p1 = getelementptr i32, ptr %p, i64 1
+  store i32 0, ptr %p1
+  %p2 = getelementptr i32, ptr %p, i64 2
+  store i32 0, ptr %p2
+  %p3 = getelementptr i32, ptr %p, i64 3
+  store i32 0, ptr %p3
+  ret void
+}
+
+%ty = type { i64 }
+
+define void @dont_create_memcpy(ptr %p1, ptr %p2) {
+; LIBCALLS-LABEL: @dont_create_memcpy(
+; LIBCALLS-NEXT:    call void @llvm.memmove.p0.p0.i64(ptr align 4 [[P2:%.*]], ptr align 4 [[P1:%.*]], i64 8, i1 false)
+; LIBCALLS-NEXT:    ret void
+;
+; NO-LIBCALLS-LABEL: @dont_create_memcpy(
+; NO-LIBCALLS-NEXT:    [[V:%.*]] = load [[TY:%.*]], ptr [[P1:%.*]], align 4
+; NO-LIBCALLS-NEXT:    store [[TY]] [[V]], ptr [[P2:%.*]], align 4
+; NO-LIBCALLS-NEXT:    ret void
+;
+  %v = load %ty, ptr %p1
+  store %ty %v, ptr %p2
+  ret void
+}
+
+define void @forward_memcpy(ptr noalias %p1, ptr noalias %p2, ptr noalias %p3) {
+; CHECK-LABEL: @forward_memcpy(
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr [[P2:%.*]], ptr [[P1:%.*]], i64 16, i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr [[P3:%.*]], ptr [[P1]], i64 16, i1 false)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.memcpy.p0.p0.i64(ptr %p2, ptr %p1, i64 16, i1 false)
+  call void @llvm.memcpy.p0.p0.i64(ptr %p3, ptr %p2, i64 16, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)


        


More information about the llvm-commits mailing list