[PATCH] D59078: memcpy is not tailcalled

Ramakota Reddy via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 28 03:00:08 PDT 2019


ramred01 updated this revision to Diff 192592.
ramred01 edited the summary of this revision.
ramred01 added a comment.
Herald added a subscriber: javed.absar.

Updated the summary to properly reflect the issue and the solution.

Added a test case.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59078/new/

https://reviews.llvm.org/D59078

Files:
  lib/CodeGen/Analysis.cpp
  test/CodeGen/AArch64/memcpy.ll


Index: test/CodeGen/AArch64/memcpy.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AArch64/memcpy.ll
@@ -0,0 +1,30 @@
+;RUN: llc %s -o - -verify-machineinstrs | FileCheck %s
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-arm-none-eabi"
+
+; Function Attrs: minsize nounwind optsize
+;CHECK-LABEL: @wmemcpy
+;CHECK: lsl
+;CHECK-NOT: bl
+;CHECK-NOT: mov
+;CHECK-NOT: ldp
+;CHECK-NEXT: b memcpy
+define dso_local i32* @wmemcpy(i32* returned, i32* nocapture readonly, i64) local_unnamed_addr #0 {
+  %4 = bitcast i32* %0 to i8*
+  %5 = bitcast i32* %1 to i8*
+  %6 = shl i64 %2, 2
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %4, i8* align 4 %5, i64 %6, i1 false)
+  ret i32* %0
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1
+
+attributes #0 = { minsize nounwind optsize "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { argmemonly nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 9.0.0 (https://git.llvm.org/git/clang.git/ b8120e9866e32a7748b963f3f5f7759351b47f51) (https://git.llvm.org/git/llvm.git/ 18fe2c5936a77484261539c570fecd6be33c476c)"}
Index: lib/CodeGen/Analysis.cpp
===================================================================
--- lib/CodeGen/Analysis.cpp
+++ lib/CodeGen/Analysis.cpp
@@ -590,6 +590,7 @@
   // argument. On other platforms like arm-none-eabi, memcpy may be
   // expanded as library call without return value, like __aeabi_memcpy.
   const CallInst *Call = cast<CallInst>(I);
+  auto *BitCastIn = dyn_cast<BitCastInst>(Call->getArgOperand(0));
   if (Function *F = Call->getCalledFunction()) {
     Intrinsic::ID IID = F->getIntrinsicID();
     if (((IID == Intrinsic::memcpy &&
@@ -598,7 +599,10 @@
           TLI.getLibcallName(RTLIB::MEMMOVE) == StringRef("memmove")) ||
          (IID == Intrinsic::memset &&
           TLI.getLibcallName(RTLIB::MEMSET) == StringRef("memset"))) &&
-        RetVal == Call->getArgOperand(0))
+        (RetVal == Call->getArgOperand(0) || 
+         (BitCastIn && RetVal == BitCastIn->getOperand(0) && 
+          Call->getArgOperand(0)->getType()->isPointerTy() &&
+          BitCastIn->getOperand(0)->getType()->isPointerTy())))
       return true;
   }
 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D59078.192592.patch
Type: text/x-patch
Size: 2863 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190328/165138d4/attachment.bin>


More information about the llvm-commits mailing list