[llvm] [CGP] Permit tail call optimization on undefined return value (PR #82419)

via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 20 13:17:41 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-x86

Author: Antonio Frighetto (antoniofrighetto)

<details>
<summary>Changes</summary>

We should be able to freely allow tail call optimization on undefined values as well.

Fixes: https://github.com/llvm/llvm-project/issues/82387.

---
Full diff: https://github.com/llvm/llvm-project/pull/82419.diff


2 Files Affected:

- (modified) llvm/lib/CodeGen/CodeGenPrepare.cpp (+3-2) 
- (modified) llvm/test/CodeGen/X86/tailcall-cgp-dup.ll (+22) 


``````````diff
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 4036f18dbc6794..feefe87f406365 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -2686,8 +2686,9 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
             attributesPermitTailCall(F, CI, RetI, *TLI)) {
           // Either we return void or the return value must be the first
           // argument of a known intrinsic or library function.
-          if (!V || (isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
-                     V == CI->getArgOperand(0))) {
+          if (!V || isa<UndefValue>(V) ||
+              (isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
+               V == CI->getArgOperand(0))) {
             TailCallBBs.push_back(Pred);
           }
         }
diff --git a/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll b/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll
index 401ed9f7bc5a9e..92811c87f5623f 100644
--- a/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll
+++ b/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll
@@ -362,8 +362,30 @@ return:
   ret ptr %src
 }
 
+ at i = global i32 0, align 4
+
+define i32 @undef_tailc() nounwind {
+; CHECK-LABEL: undef_tailc:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    cmpl $0, _i(%rip)
+; CHECK-NEXT:    jne _qux ## TAILCALL
+; CHECK-NEXT:  ## %bb.1:
+; CHECK-NEXT:    retq
+  %1 = load i32, ptr @i, align 4
+  %2 = icmp eq i32 %1, 0
+  br i1 %2, label %5, label %3
+
+3:
+  %4 = tail call i32 @qux()
+  br label %5
+
+5:
+  ret i32 undef
+}
+
 declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1)
 declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1)
 declare noalias ptr @malloc(i64)
 declare ptr @strcpy(ptr noalias returned writeonly, ptr noalias nocapture readonly)
 declare ptr @baz(ptr, ptr)
+declare i32 @qux()

``````````

</details>


https://github.com/llvm/llvm-project/pull/82419


More information about the llvm-commits mailing list