[llvm] [CGP] Consider arguments and ret values in `dupRetToEnableTailCallOpts` (PR #76613)

Antonio Frighetto via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 7 08:56:42 PST 2024


https://github.com/antoniofrighetto updated https://github.com/llvm/llvm-project/pull/76613

>From d8799422a52e9f119b18bf80d1aa3993533c066a Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Fri, 29 Dec 2023 00:21:12 +0100
Subject: [PATCH 1/2] [CGP] Precommit tests for PR76613 (NFC)

---
 llvm/test/CodeGen/X86/tailcall-cgp-dup.ll | 168 ++++++++++++++++++++++
 1 file changed, 168 insertions(+)

diff --git a/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll b/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll
index 75bbae1050d61c..cefcd4c0c3df91 100644
--- a/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll
+++ b/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll
@@ -184,3 +184,171 @@ return:
   %retval = phi ptr [ %ptr, %if.then ], [ %obj, %entry ]
   ret ptr %retval
 }
+
+define ptr @memset_tailc(ptr %ret_val, i64 %sz) nounwind {
+; CHECK-LABEL: memset_tailc:
+; CHECK:       ## %bb.0: ## %entry
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    movq %rdi, %rbx
+; CHECK-NEXT:    testq %rdi, %rdi
+; CHECK-NEXT:    je LBB4_2
+; CHECK-NEXT:  ## %bb.1: ## %if.then
+; CHECK-NEXT:    movq %rsi, %rdx
+; CHECK-NEXT:    movq %rbx, %rdi
+; CHECK-NEXT:    xorl %esi, %esi
+; CHECK-NEXT:    callq _memset
+; CHECK-NEXT:  LBB4_2: ## %return
+; CHECK-NEXT:    movq %rbx, %rax
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    retq
+entry:
+  %cmp = icmp eq ptr %ret_val, null
+  br i1 %cmp, label %return, label %if.then
+
+if.then:
+  tail call void @llvm.memset.p0.i64(ptr nonnull align 1 %ret_val, i8 0, i64 %sz, i1 false)
+  br label %return
+
+return:
+  ret ptr %ret_val
+}
+
+define ptr @baz_not_to_tailc(ptr %ret_val, ptr %arg) nounwind {
+; CHECK-LABEL: baz_not_to_tailc:
+; CHECK:       ## %bb.0: ## %entry
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    movq %rdi, %rbx
+; CHECK-NEXT:    testq %rdi, %rdi
+; CHECK-NEXT:    je LBB5_2
+; CHECK-NEXT:  ## %bb.1: ## %if.then
+; CHECK-NEXT:    movq %rbx, %rdi
+; CHECK-NEXT:    callq _baz
+; CHECK-NEXT:  LBB5_2: ## %return
+; CHECK-NEXT:    movq %rbx, %rax
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    retq
+entry:
+  %cmp = icmp eq ptr %ret_val, null
+  br i1 %cmp, label %return, label %if.then
+
+if.then:
+  %rv = tail call ptr @baz(ptr %ret_val, ptr %arg)
+  br label %return
+
+return:
+  ret ptr %ret_val
+}
+
+define ptr @memcpy_tailc(ptr %ret_val, i64 %sz, ptr %src) nounwind {
+; CHECK-LABEL: memcpy_tailc:
+; CHECK:       ## %bb.0: ## %entry
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    testq %rsi, %rsi
+; CHECK-NEXT:    je LBB6_1
+; CHECK-NEXT:  ## %bb.2: ## %if.then
+; CHECK-NEXT:    movq %rsi, %rax
+; CHECK-NEXT:    movq %rdi, %rbx
+; CHECK-NEXT:    movq %rdx, %rsi
+; CHECK-NEXT:    movq %rax, %rdx
+; CHECK-NEXT:    callq _memcpy
+; CHECK-NEXT:    jmp LBB6_3
+; CHECK-NEXT:  LBB6_1:
+; CHECK-NEXT:    movq %rdx, %rbx
+; CHECK-NEXT:  LBB6_3: ## %return
+; CHECK-NEXT:    movq %rbx, %rax
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    retq
+entry:
+  %cmp = icmp eq i64 %sz, 0
+  br i1 %cmp, label %return, label %if.then
+
+if.then:
+  tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 %ret_val, ptr align 1 %src, i64 %sz, i1 false)
+  br label %return
+
+return:
+  %phi = phi ptr [ %ret_val, %if.then ], [ %src, %entry ]
+  ret ptr %phi
+}
+
+define ptr @strcpy_legal_and_baz_illegal(ptr %arg, i64 %sz, ptr %2) nounwind {
+; CHECK-LABEL: strcpy_legal_and_baz_illegal:
+; CHECK:       ## %bb.0: ## %entry
+; CHECK-NEXT:    pushq %r15
+; CHECK-NEXT:    pushq %r14
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    movq %rdx, %r14
+; CHECK-NEXT:    movq %rsi, %r15
+; CHECK-NEXT:    movq %rdi, %rbx
+; CHECK-NEXT:    movq %rsi, %rdi
+; CHECK-NEXT:    callq _malloc
+; CHECK-NEXT:    testq %r15, %r15
+; CHECK-NEXT:    je LBB7_2
+; CHECK-NEXT:  ## %bb.1: ## %if.then
+; CHECK-NEXT:    movq %rax, %rdi
+; CHECK-NEXT:    movq %r14, %rsi
+; CHECK-NEXT:    movq %rax, %rbx
+; CHECK-NEXT:    callq _strcpy
+; CHECK-NEXT:    jmp LBB7_3
+; CHECK-NEXT:  LBB7_2: ## %if.else
+; CHECK-NEXT:    movq %rbx, %rdi
+; CHECK-NEXT:    movq %r14, %rsi
+; CHECK-NEXT:    callq _baz
+; CHECK-NEXT:  LBB7_3: ## %return
+; CHECK-NEXT:    movq %rbx, %rax
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    popq %r14
+; CHECK-NEXT:    popq %r15
+; CHECK-NEXT:    retq
+entry:
+  %strcpy_ret_val = tail call noalias ptr @malloc(i64 %sz)
+  %cmp = icmp eq i64 %sz, 0
+  br i1 %cmp, label %if.else, label %if.then
+
+if.then:
+  %rv_unused = tail call ptr @strcpy(ptr dereferenceable(1) %strcpy_ret_val, ptr dereferenceable(1) %2)
+  br label %return
+
+if.else:
+  %rv_unused_2 = tail call ptr @baz(ptr %arg, ptr %2)
+  br label %return
+
+return:
+  %phi = phi ptr [ %strcpy_ret_val, %if.then ], [ %arg, %if.else ]
+  ret ptr %phi
+}
+
+define ptr @memset_not_to_tailc(ptr %arg, i64 %sz, ptr %ret_val_1, ptr %ret_val_2) nounwind {
+; CHECK-LABEL: memset_not_to_tailc:
+; CHECK:       ## %bb.0: ## %entry
+; CHECK-NEXT:    movq %rdx, %rax
+; CHECK-NEXT:    testq %rsi, %rsi
+; CHECK-NEXT:    je LBB8_2
+; CHECK-NEXT:  ## %bb.1: ## %if.then
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    movq %rcx, %rbx
+; CHECK-NEXT:    movq %rsi, %rdx
+; CHECK-NEXT:    xorl %esi, %esi
+; CHECK-NEXT:    callq _memset
+; CHECK-NEXT:    movq %rbx, %rax
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:  LBB8_2: ## %return
+; CHECK-NEXT:    retq
+entry:
+  %cmp = icmp eq i64 %sz, 0
+  br i1 %cmp, label %return, label %if.then
+
+if.then:
+  tail call void @llvm.memset.p0.i64(ptr align 1 %arg, i8 0, i64 %sz, i1 false)
+  br label %return
+
+return:
+  %phi = phi ptr [ %ret_val_2, %if.then ], [ %ret_val_1, %entry ]
+  ret ptr %phi
+}
+
+declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg)
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
+declare noalias ptr @malloc(i64)
+declare ptr @strcpy(ptr noalias returned writeonly, ptr noalias nocapture readonly)
+declare ptr @baz(ptr, ptr)

>From c31031660261394ddab8eb1af16e830347eb2d02 Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Fri, 29 Dec 2023 00:25:23 +0100
Subject: [PATCH 2/2] [CGP] Consider arguments and ret values in
 `dupRetToEnableTailCallOpts`

Hint further tail call optimization opportunities when the examined
returned value is either the return value of a call instruction, or
a function argument. Moreover, take into account the cases in which
incoming values from phi-nodes, not directly tail call instructions,
may still be simplified.

Fixes: https://github.com/llvm/llvm-project/issues/75455.
---
 llvm/lib/CodeGen/CodeGenPrepare.cpp       | 70 +++++++++++++++++++++--
 llvm/test/CodeGen/X86/tailcall-cgp-dup.ll | 52 +++++++----------
 2 files changed, 86 insertions(+), 36 deletions(-)

diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 5383b15c1c7f5d..e6c13faae06a7e 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -2522,8 +2522,39 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
   return false;
 }
 
+static bool isIntrinsicOrLFToBeTailCalled(const TargetLibraryInfo *TLInfo,
+                                          const CallInst *CI) {
+  assert(CI && CI->use_empty());
+
+  if (const auto *II = dyn_cast<IntrinsicInst>(CI))
+    switch (II->getIntrinsicID()) {
+    case Intrinsic::memset:
+    case Intrinsic::memcpy:
+    case Intrinsic::memmove:
+      return true;
+    default:
+      return false;
+    }
+
+  LibFunc LF;
+  Function *Callee = CI->getCalledFunction();
+  if (Callee && TLInfo && TLInfo->getLibFunc(*Callee, LF))
+    switch (LF) {
+    case LibFunc_strcpy:
+    case LibFunc_strncpy:
+    case LibFunc_strcat:
+    case LibFunc_strncat:
+      return true;
+    default:
+      return false;
+    }
+
+  return false;
+}
+
 /// Look for opportunities to duplicate return instructions to the predecessor
-/// to enable tail call optimizations. The case it is currently looking for is:
+/// to enable tail call optimizations. The case it is currently looking for are
+/// simple return of call values, function arguments, or phi nodes as follows:
 /// @code
 /// bb0:
 ///   %tmp0 = tail call i32 @f0()
@@ -2580,7 +2611,7 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
     }
 
     PN = dyn_cast<PHINode>(V);
-    if (!PN)
+    if (!PN && !isa<Argument>(V) && !isa<CallInst>(V))
       return false;
   }
 
@@ -2620,8 +2651,32 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
       // Make sure the phi value is indeed produced by the tail call.
       if (CI && CI->hasOneUse() && CI->getParent() == PredBB &&
           TLI->mayBeEmittedAsTailCall(CI) &&
-          attributesPermitTailCall(F, CI, RetI, *TLI))
+          attributesPermitTailCall(F, CI, RetI, *TLI)) {
         TailCallBBs.push_back(PredBB);
+      } else {
+        /// Consider the cases in which the phi value is indirectly produced by
+        /// the tail call, for example when encountering memset(), memmove(),
+        /// strcpy(), whose return value may have been optimized out. In such
+        /// cases, the value needs to be the first function argument.
+        /// @code
+        /// bb0:
+        ///   tail call void @llvm.memset.p0.i64(ptr %0, i8 0, i64 %1)
+        ///   br label %return
+        /// return:
+        ///   %phi = phi ptr [ %0, %bb0 ], [ %2, %entry ]
+        /// @endcode
+        if (PredBB && PredBB->getSingleSuccessor() == BB)
+          CI = dyn_cast_or_null<CallInst>(
+              PredBB->getTerminator()->getPrevNonDebugInstruction(true));
+
+        if (CI && CI->use_empty() &&
+            isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
+            CI->hasArgument(IncomingVal) &&
+            IncomingVal == CI->getArgOperand(0) &&
+            TLI->mayBeEmittedAsTailCall(CI) &&
+            attributesPermitTailCall(F, CI, RetI, *TLI))
+          TailCallBBs.push_back(PredBB);
+      }
     }
   } else {
     SmallPtrSet<BasicBlock *, 4> VisitedBBs;
@@ -2631,8 +2686,13 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
       if (Instruction *I = Pred->rbegin()->getPrevNonDebugInstruction(true)) {
         CallInst *CI = dyn_cast<CallInst>(I);
         if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
-            attributesPermitTailCall(F, CI, RetI, *TLI))
-          TailCallBBs.push_back(Pred);
+            attributesPermitTailCall(F, CI, RetI, *TLI)) {
+          bool RVIsArgOrCI =
+              isa_and_nonnull<Argument>(V) || isa_and_nonnull<CallInst>(V);
+          if (!RVIsArgOrCI ||
+              (RVIsArgOrCI && isIntrinsicOrLFToBeTailCalled(TLInfo, CI)))
+            TailCallBBs.push_back(Pred);
+        }
       }
     }
   }
diff --git a/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll b/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll
index cefcd4c0c3df91..488e503b5c7c9c 100644
--- a/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll
+++ b/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll
@@ -188,18 +188,14 @@ return:
 define ptr @memset_tailc(ptr %ret_val, i64 %sz) nounwind {
 ; CHECK-LABEL: memset_tailc:
 ; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    pushq %rbx
-; CHECK-NEXT:    movq %rdi, %rbx
 ; CHECK-NEXT:    testq %rdi, %rdi
-; CHECK-NEXT:    je LBB4_2
-; CHECK-NEXT:  ## %bb.1: ## %if.then
+; CHECK-NEXT:    je LBB4_1
+; CHECK-NEXT:  ## %bb.2: ## %if.then
 ; CHECK-NEXT:    movq %rsi, %rdx
-; CHECK-NEXT:    movq %rbx, %rdi
 ; CHECK-NEXT:    xorl %esi, %esi
-; CHECK-NEXT:    callq _memset
-; CHECK-NEXT:  LBB4_2: ## %return
-; CHECK-NEXT:    movq %rbx, %rax
-; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    jmp _memset ## TAILCALL
+; CHECK-NEXT:  LBB4_1: ## %return
+; CHECK-NEXT:    movq %rdi, %rax
 ; CHECK-NEXT:    retq
 entry:
   %cmp = icmp eq ptr %ret_val, null
@@ -242,21 +238,15 @@ return:
 define ptr @memcpy_tailc(ptr %ret_val, i64 %sz, ptr %src) nounwind {
 ; CHECK-LABEL: memcpy_tailc:
 ; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    testq %rsi, %rsi
 ; CHECK-NEXT:    je LBB6_1
 ; CHECK-NEXT:  ## %bb.2: ## %if.then
 ; CHECK-NEXT:    movq %rsi, %rax
-; CHECK-NEXT:    movq %rdi, %rbx
 ; CHECK-NEXT:    movq %rdx, %rsi
 ; CHECK-NEXT:    movq %rax, %rdx
-; CHECK-NEXT:    callq _memcpy
-; CHECK-NEXT:    jmp LBB6_3
-; CHECK-NEXT:  LBB6_1:
-; CHECK-NEXT:    movq %rdx, %rbx
-; CHECK-NEXT:  LBB6_3: ## %return
-; CHECK-NEXT:    movq %rbx, %rax
-; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    jmp _memcpy ## TAILCALL
+; CHECK-NEXT:  LBB6_1: ## %return
+; CHECK-NEXT:    movq %rdx, %rax
 ; CHECK-NEXT:    retq
 entry:
   %cmp = icmp eq i64 %sz, 0
@@ -277,25 +267,25 @@ define ptr @strcpy_legal_and_baz_illegal(ptr %arg, i64 %sz, ptr %2) nounwind {
 ; CHECK-NEXT:    pushq %r15
 ; CHECK-NEXT:    pushq %r14
 ; CHECK-NEXT:    pushq %rbx
-; CHECK-NEXT:    movq %rdx, %r14
+; CHECK-NEXT:    movq %rdx, %rbx
 ; CHECK-NEXT:    movq %rsi, %r15
-; CHECK-NEXT:    movq %rdi, %rbx
+; CHECK-NEXT:    movq %rdi, %r14
 ; CHECK-NEXT:    movq %rsi, %rdi
 ; CHECK-NEXT:    callq _malloc
 ; CHECK-NEXT:    testq %r15, %r15
-; CHECK-NEXT:    je LBB7_2
-; CHECK-NEXT:  ## %bb.1: ## %if.then
+; CHECK-NEXT:    je LBB7_1
+; CHECK-NEXT:  ## %bb.2: ## %if.then
 ; CHECK-NEXT:    movq %rax, %rdi
-; CHECK-NEXT:    movq %r14, %rsi
-; CHECK-NEXT:    movq %rax, %rbx
-; CHECK-NEXT:    callq _strcpy
-; CHECK-NEXT:    jmp LBB7_3
-; CHECK-NEXT:  LBB7_2: ## %if.else
-; CHECK-NEXT:    movq %rbx, %rdi
-; CHECK-NEXT:    movq %r14, %rsi
+; CHECK-NEXT:    movq %rbx, %rsi
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    popq %r14
+; CHECK-NEXT:    popq %r15
+; CHECK-NEXT:    jmp _strcpy ## TAILCALL
+; CHECK-NEXT:  LBB7_1: ## %if.else
+; CHECK-NEXT:    movq %r14, %rdi
+; CHECK-NEXT:    movq %rbx, %rsi
 ; CHECK-NEXT:    callq _baz
-; CHECK-NEXT:  LBB7_3: ## %return
-; CHECK-NEXT:    movq %rbx, %rax
+; CHECK-NEXT:    movq %r14, %rax
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    popq %r14
 ; CHECK-NEXT:    popq %r15



More information about the llvm-commits mailing list