[PATCH] D86020: [MemCpyOptimizer] Optimize passing byref function arguments down the stack

Anatoly Trosinenko via Phabricator via cfe-commits cfe-commits at lists.llvm.org
Sat Aug 15 09:52:13 PDT 2020


atrosinenko created this revision.
atrosinenko added reviewers: arsenm, jdoerfert, rjmccall, rampitec.
Herald added subscribers: cfe-commits, hiraditya.
Herald added projects: clang, LLVM.
atrosinenko requested review of this revision.
Herald added a subscriber: wdng.

When a byref function argument is passed through like this:

  void leaf(struct X);
  void middle(struct X a) {
    leaf(a);
  }

... an unnecessary temporary copy may be introduced inside middle().

This patch makes that copy being eliminated by MemCpyOptimizer unless size of
the byref argument is 8 bytes or less (in that case memcpy() is eliminated by
InstCombine first preventing later optimization).


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D86020

Files:
  clang/test/CodeGen/msp430-struct-or-union-args.c
  llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
  llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp


Index: llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1217,10 +1217,10 @@
   return true;
 }
 
-/// This is called on every byval argument in call sites.
-bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
+/// This is called on every byval/byref argument in call sites.
+bool MemCpyOptPass::processByValOrByRefArgument(CallBase &CB, unsigned ArgNo) {
   const DataLayout &DL = CB.getCaller()->getParent()->getDataLayout();
-  // Find out what feeds this byval argument.
+  // Find out what feeds this byval/byref argument.
   Value *ByValArg = CB.getArgOperand(ArgNo);
   Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType();
   uint64_t ByValSize = DL.getTypeAllocSize(ByValTy);
@@ -1287,7 +1287,7 @@
     TmpCast = TmpBitCast;
   }
 
-  LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"
+  LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval or byref:\n"
                     << "  " << *MDep << "\n"
                     << "  " << CB << "\n");
 
@@ -1328,8 +1328,8 @@
         RepeatInstruction = processMemMove(M);
       else if (auto *CB = dyn_cast<CallBase>(I)) {
         for (unsigned i = 0, e = CB->arg_size(); i != e; ++i)
-          if (CB->isByValArgument(i))
-            MadeChange |= processByValArgument(*CB, i);
+          if (CB->isByValArgument(i) || CB->paramHasAttr(i, Attribute::ByRef))
+            MadeChange |= processByValOrByRefArgument(*CB, i);
       }
 
       // Reprocess the instruction if desired.
Index: llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
===================================================================
--- llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
+++ llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
@@ -65,7 +65,7 @@
   bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep);
   bool processMemSetMemCpyDependence(MemCpyInst *MemCpy, MemSetInst *MemSet);
   bool performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, MemSetInst *MemSet);
-  bool processByValArgument(CallBase &CB, unsigned ArgNo);
+  bool processByValOrByRefArgument(CallBase &CB, unsigned ArgNo);
   Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr,
                                     Value *ByteVal);
 
Index: clang/test/CodeGen/msp430-struct-or-union-args.c
===================================================================
--- clang/test/CodeGen/msp430-struct-or-union-args.c
+++ clang/test/CodeGen/msp430-struct-or-union-args.c
@@ -1,5 +1,7 @@
 // REQUIRES: msp430-registered-target
-// RUN: %clang -target msp430 -fno-inline-functions -S -o- %s | FileCheck --check-prefixes=ASM %s
+// Optimized to check that some of memcpy intrinsic invocations are optimized out.
+// RUN: %clang -target msp430 -fno-inline-functions -S -Os -o- %s | FileCheck --check-prefixes=ASM %s
+// Do not use any optimization to not clutter the output with deduced LLVM IR attributes.
 // RUN: %clang -target msp430 -fno-inline-functions -S -emit-llvm -o- %s | FileCheck --check-prefixes=IR %s
 
 #include <limits.h>
@@ -89,3 +91,33 @@
 // ASM-NEXT: ret
   middle_u(u);
 }
+
+// No need to create a temporary copy of the struct/union-typed argument
+// if it is just passed to other function as-is.
+// TODO For now, it works when size of structure is more than 8 bytes,
+// otherwise the memcpy intrinsic will be replaced by InstCombiner.
+
+struct LL {
+  long long a[2];
+};
+
+extern struct LL ll;
+
+extern void leaf(struct LL x);
+
+void middle(struct LL x) {
+// ASM:      middle:
+// No stack-allocated objects:
+// ASM-NOT:  r1
+// ASM: call #leaf
+// ASM-NEXT: ret
+  leaf(x);
+}
+
+void caller(void) {
+// ASM:      caller:
+// ASM:      mov #ll, r12
+// ASM-NEXT: call #middle
+// ASM-NEXT: ret
+  middle(ll);
+}


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D86020.285847.patch
Type: text/x-patch
Size: 3926 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20200815/058013df/attachment.bin>


More information about the cfe-commits mailing list