[llvm] [CGP] Consider arguments and ret values in `dupRetToEnableTailCallOpts` (PR #76613)
Antonio Frighetto via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 11 22:07:27 PST 2024
https://github.com/antoniofrighetto updated https://github.com/llvm/llvm-project/pull/76613
>From 36579e9ad578f0f93c2a671123f8e42f15db68b2 Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Fri, 29 Dec 2023 00:21:12 +0100
Subject: [PATCH 1/2] [CGP] Precommit tests for PR76613 (NFC)
---
llvm/test/CodeGen/X86/tailcall-cgp-dup.ll | 193 ++++++++++++++++++++++
1 file changed, 193 insertions(+)
diff --git a/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll b/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll
index 75bbae1050d61c..c48087da500e0e 100644
--- a/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll
+++ b/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll
@@ -184,3 +184,196 @@ return:
%retval = phi ptr [ %ptr, %if.then ], [ %obj, %entry ]
ret ptr %retval
}
+
+define ptr @memset_tailc(ptr %ret_val, i64 %sz) nounwind {
+; CHECK-LABEL: memset_tailc:
+; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: movq %rdi, %rbx
+; CHECK-NEXT: testq %rdi, %rdi
+; CHECK-NEXT: je LBB4_2
+; CHECK-NEXT: ## %bb.1: ## %if.then
+; CHECK-NEXT: movq %rsi, %rdx
+; CHECK-NEXT: movq %rbx, %rdi
+; CHECK-NEXT: xorl %esi, %esi
+; CHECK-NEXT: callq _memset
+; CHECK-NEXT: LBB4_2: ## %return
+; CHECK-NEXT: movq %rbx, %rax
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: retq
+entry:
+ %cmp = icmp eq ptr %ret_val, null
+ br i1 %cmp, label %return, label %if.then
+
+if.then:
+ tail call void @llvm.memset.p0.i64(ptr nonnull align 1 %ret_val, i8 0, i64 %sz, i1 false)
+ br label %return
+
+return:
+ ret ptr %ret_val
+}
+
+define ptr @memcpy_tailc(ptr %ret_val, i64 %sz, ptr %src) nounwind {
+; CHECK-LABEL: memcpy_tailc:
+; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: testq %rsi, %rsi
+; CHECK-NEXT: je LBB5_1
+; CHECK-NEXT: ## %bb.2: ## %if.then
+; CHECK-NEXT: movq %rsi, %rax
+; CHECK-NEXT: movq %rdi, %rbx
+; CHECK-NEXT: movq %rdx, %rsi
+; CHECK-NEXT: movq %rax, %rdx
+; CHECK-NEXT: callq _memcpy
+; CHECK-NEXT: jmp LBB5_3
+; CHECK-NEXT: LBB5_1:
+; CHECK-NEXT: movq %rdx, %rbx
+; CHECK-NEXT: LBB5_3: ## %return
+; CHECK-NEXT: movq %rbx, %rax
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: retq
+entry:
+ %cmp = icmp eq i64 %sz, 0
+ br i1 %cmp, label %return, label %if.then
+
+if.then:
+ tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 %ret_val, ptr align 1 %src, i64 %sz, i1 false)
+ br label %return
+
+return:
+ %phi = phi ptr [ %ret_val, %if.then ], [ %src, %entry ]
+ ret ptr %phi
+}
+
+define ptr @strcpy_legal_and_baz_illegal(ptr %arg, i64 %sz, ptr %2) nounwind {
+; CHECK-LABEL: strcpy_legal_and_baz_illegal:
+; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: pushq %r15
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: movq %rdx, %r14
+; CHECK-NEXT: movq %rsi, %r15
+; CHECK-NEXT: movq %rdi, %rbx
+; CHECK-NEXT: movq %rsi, %rdi
+; CHECK-NEXT: callq _malloc
+; CHECK-NEXT: testq %r15, %r15
+; CHECK-NEXT: je LBB6_2
+; CHECK-NEXT: ## %bb.1: ## %if.then
+; CHECK-NEXT: movq %rax, %rdi
+; CHECK-NEXT: movq %r14, %rsi
+; CHECK-NEXT: movq %rax, %rbx
+; CHECK-NEXT: callq _strcpy
+; CHECK-NEXT: jmp LBB6_3
+; CHECK-NEXT: LBB6_2: ## %if.else
+; CHECK-NEXT: movq %rbx, %rdi
+; CHECK-NEXT: movq %r14, %rsi
+; CHECK-NEXT: callq _baz
+; CHECK-NEXT: LBB6_3: ## %return
+; CHECK-NEXT: movq %rbx, %rax
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: popq %r15
+; CHECK-NEXT: retq
+entry:
+ %strcpy_ret_val = tail call noalias ptr @malloc(i64 %sz)
+ %cmp = icmp eq i64 %sz, 0
+ br i1 %cmp, label %if.else, label %if.then
+
+if.then:
+ %rv_unused = tail call ptr @strcpy(ptr dereferenceable(1) %strcpy_ret_val, ptr dereferenceable(1) %2)
+ br label %return
+
+if.else:
+ %rv_unused_2 = tail call ptr @baz(ptr %arg, ptr %2)
+ br label %return
+
+return:
+ %phi = phi ptr [ %strcpy_ret_val, %if.then ], [ %arg, %if.else ]
+ ret ptr %phi
+}
+
+define ptr @baz_illegal_tailc(ptr %ret_val, ptr %arg) nounwind {
+; CHECK-LABEL: baz_illegal_tailc:
+; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: movq %rdi, %rbx
+; CHECK-NEXT: testq %rdi, %rdi
+; CHECK-NEXT: je LBB7_2
+; CHECK-NEXT: ## %bb.1: ## %if.then
+; CHECK-NEXT: movq %rbx, %rdi
+; CHECK-NEXT: callq _baz
+; CHECK-NEXT: LBB7_2: ## %return
+; CHECK-NEXT: movq %rbx, %rax
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: retq
+entry:
+ %cmp = icmp eq ptr %ret_val, null
+ br i1 %cmp, label %return, label %if.then
+
+if.then:
+ %rv = tail call ptr @baz(ptr %ret_val, ptr %arg)
+ br label %return
+
+return:
+ ret ptr %ret_val
+}
+
+define ptr @memset_illegal_tailc(ptr %arg, i64 %sz, ptr %ret_val_1, ptr %ret_val_2) nounwind {
+; CHECK-LABEL: memset_illegal_tailc:
+; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: movq %rdx, %rax
+; CHECK-NEXT: testq %rsi, %rsi
+; CHECK-NEXT: je LBB8_2
+; CHECK-NEXT: ## %bb.1: ## %if.then
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: movq %rcx, %rbx
+; CHECK-NEXT: movq %rsi, %rdx
+; CHECK-NEXT: xorl %esi, %esi
+; CHECK-NEXT: callq _memset
+; CHECK-NEXT: movq %rbx, %rax
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: LBB8_2: ## %return
+; CHECK-NEXT: retq
+entry:
+ %cmp = icmp eq i64 %sz, 0
+ br i1 %cmp, label %return, label %if.then
+
+if.then:
+ tail call void @llvm.memset.p0.i64(ptr align 1 %arg, i8 0, i64 %sz, i1 false)
+ br label %return
+
+return:
+ %phi = phi ptr [ %ret_val_2, %if.then ], [ %ret_val_1, %entry ]
+ ret ptr %phi
+}
+
+define ptr @strcpy_illegal_tailc(ptr %dest, i64 %sz, ptr readonly returned %src) nounwind {
+; CHECK-LABEL: strcpy_illegal_tailc:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: movq %rdx, %rbx
+; CHECK-NEXT: testq %rsi, %rsi
+; CHECK-NEXT: je LBB9_2
+; CHECK-NEXT: ## %bb.1: ## %if.then
+; CHECK-NEXT: movq %rbx, %rsi
+; CHECK-NEXT: callq _strcpy
+; CHECK-NEXT: LBB9_2: ## %return
+; CHECK-NEXT: movq %rbx, %rax
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: retq
+ %cmp = icmp eq i64 %sz, 0
+ br i1 %cmp, label %return, label %if.then
+
+if.then:
+ %6 = tail call ptr @strcpy(ptr dereferenceable(1) %dest, ptr dereferenceable(1) %src)
+ br label %return
+
+return:
+ ret ptr %src
+}
+
+declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1)
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1)
+declare noalias ptr @malloc(i64)
+declare ptr @strcpy(ptr noalias returned writeonly, ptr noalias nocapture readonly)
+declare ptr @baz(ptr, ptr)
>From 8df3ae3695111a7a981b5f52ae3b8feba0e311b4 Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Fri, 29 Dec 2023 00:25:23 +0100
Subject: [PATCH 2/2] [CGP] Extend `dupRetToEnableTailCallOpts` to known
intrinsics
Hint further tail call optimization opportunities when the examined
returned value is the return value of a known intrinsic or library
function, and it appears as first function argument.
Fixes: https://github.com/llvm/llvm-project/issues/75455.
---
llvm/lib/CodeGen/CodeGenPrepare.cpp | 70 +++++++++++++++++++++--
llvm/test/CodeGen/X86/tailcall-cgp-dup.ll | 52 +++++++----------
2 files changed, 85 insertions(+), 37 deletions(-)
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 09c4922d8822cc..32a25b49b4e4b0 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -2522,8 +2522,40 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
return false;
}
+static bool isIntrinsicOrLFToBeTailCalled(const TargetLibraryInfo *TLInfo,
+ const CallInst *CI) {
+ assert(CI && CI->use_empty());
+
+ if (const auto *II = dyn_cast<IntrinsicInst>(CI))
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::memset:
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove:
+ return true;
+ default:
+ return false;
+ }
+
+ LibFunc LF;
+ Function *Callee = CI->getCalledFunction();
+ if (Callee && TLInfo && TLInfo->getLibFunc(*Callee, LF))
+ switch (LF) {
+ case LibFunc_strcpy:
+ case LibFunc_strncpy:
+ case LibFunc_strcat:
+ case LibFunc_strncat:
+ return true;
+ default:
+ return false;
+ }
+
+ return false;
+}
+
/// Look for opportunities to duplicate return instructions to the predecessor
-/// to enable tail call optimizations. The case it is currently looking for is:
+/// to enable tail call optimizations. The case it is currently looking for is
+/// the following one. Known intrinsics or library function that may be tail
+/// called are taken into account as well.
/// @code
/// bb0:
/// %tmp0 = tail call i32 @f0()
@@ -2580,8 +2612,6 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
}
PN = dyn_cast<PHINode>(V);
- if (!PN)
- return false;
}
if (PN && PN->getParent() != BB)
@@ -2620,8 +2650,30 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
// Make sure the phi value is indeed produced by the tail call.
if (CI && CI->hasOneUse() && CI->getParent() == PredBB &&
TLI->mayBeEmittedAsTailCall(CI) &&
- attributesPermitTailCall(F, CI, RetI, *TLI))
+ attributesPermitTailCall(F, CI, RetI, *TLI)) {
TailCallBBs.push_back(PredBB);
+ } else {
+ // Consider the cases in which the phi value is indirectly produced by
+ // the tail call, for example when encountering memset(), memmove(),
+ // strcpy(), whose return value may have been optimized out. In such
+ // cases, the value needs to be the first function argument.
+ //
+ // bb0:
+ // tail call void @llvm.memset.p0.i64(ptr %0, i8 0, i64 %1)
+ // br label %return
+ // return:
+ // %phi = phi ptr [ %0, %bb0 ], [ %2, %entry ]
+ if (PredBB && PredBB->getSingleSuccessor() == BB)
+ CI = dyn_cast_or_null<CallInst>(
+ PredBB->getTerminator()->getPrevNonDebugInstruction(true));
+
+ if (CI && CI->use_empty() &&
+ isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
+ IncomingVal == CI->getArgOperand(0) &&
+ TLI->mayBeEmittedAsTailCall(CI) &&
+ attributesPermitTailCall(F, CI, RetI, *TLI))
+ TailCallBBs.push_back(PredBB);
+ }
}
} else {
SmallPtrSet<BasicBlock *, 4> VisitedBBs;
@@ -2631,8 +2683,14 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
if (Instruction *I = Pred->rbegin()->getPrevNonDebugInstruction(true)) {
CallInst *CI = dyn_cast<CallInst>(I);
if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
- attributesPermitTailCall(F, CI, RetI, *TLI))
- TailCallBBs.push_back(Pred);
+ attributesPermitTailCall(F, CI, RetI, *TLI)) {
+ // Either we return void or the return value must be the first
+ // argument of a known intrinsic or library function.
+ if (!V || (isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
+ V == CI->getArgOperand(0))) {
+ TailCallBBs.push_back(Pred);
+ }
+ }
}
}
}
diff --git a/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll b/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll
index c48087da500e0e..401ed9f7bc5a9e 100644
--- a/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll
+++ b/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll
@@ -188,18 +188,14 @@ return:
define ptr @memset_tailc(ptr %ret_val, i64 %sz) nounwind {
; CHECK-LABEL: memset_tailc:
; CHECK: ## %bb.0: ## %entry
-; CHECK-NEXT: pushq %rbx
-; CHECK-NEXT: movq %rdi, %rbx
; CHECK-NEXT: testq %rdi, %rdi
-; CHECK-NEXT: je LBB4_2
-; CHECK-NEXT: ## %bb.1: ## %if.then
+; CHECK-NEXT: je LBB4_1
+; CHECK-NEXT: ## %bb.2: ## %if.then
; CHECK-NEXT: movq %rsi, %rdx
-; CHECK-NEXT: movq %rbx, %rdi
; CHECK-NEXT: xorl %esi, %esi
-; CHECK-NEXT: callq _memset
-; CHECK-NEXT: LBB4_2: ## %return
-; CHECK-NEXT: movq %rbx, %rax
-; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: jmp _memset ## TAILCALL
+; CHECK-NEXT: LBB4_1: ## %return
+; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: retq
entry:
%cmp = icmp eq ptr %ret_val, null
@@ -216,21 +212,15 @@ return:
define ptr @memcpy_tailc(ptr %ret_val, i64 %sz, ptr %src) nounwind {
; CHECK-LABEL: memcpy_tailc:
; CHECK: ## %bb.0: ## %entry
-; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: testq %rsi, %rsi
; CHECK-NEXT: je LBB5_1
; CHECK-NEXT: ## %bb.2: ## %if.then
; CHECK-NEXT: movq %rsi, %rax
-; CHECK-NEXT: movq %rdi, %rbx
; CHECK-NEXT: movq %rdx, %rsi
; CHECK-NEXT: movq %rax, %rdx
-; CHECK-NEXT: callq _memcpy
-; CHECK-NEXT: jmp LBB5_3
-; CHECK-NEXT: LBB5_1:
-; CHECK-NEXT: movq %rdx, %rbx
-; CHECK-NEXT: LBB5_3: ## %return
-; CHECK-NEXT: movq %rbx, %rax
-; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: jmp _memcpy ## TAILCALL
+; CHECK-NEXT: LBB5_1: ## %return
+; CHECK-NEXT: movq %rdx, %rax
; CHECK-NEXT: retq
entry:
%cmp = icmp eq i64 %sz, 0
@@ -251,25 +241,25 @@ define ptr @strcpy_legal_and_baz_illegal(ptr %arg, i64 %sz, ptr %2) nounwind {
; CHECK-NEXT: pushq %r15
; CHECK-NEXT: pushq %r14
; CHECK-NEXT: pushq %rbx
-; CHECK-NEXT: movq %rdx, %r14
+; CHECK-NEXT: movq %rdx, %rbx
; CHECK-NEXT: movq %rsi, %r15
-; CHECK-NEXT: movq %rdi, %rbx
+; CHECK-NEXT: movq %rdi, %r14
; CHECK-NEXT: movq %rsi, %rdi
; CHECK-NEXT: callq _malloc
; CHECK-NEXT: testq %r15, %r15
-; CHECK-NEXT: je LBB6_2
-; CHECK-NEXT: ## %bb.1: ## %if.then
+; CHECK-NEXT: je LBB6_1
+; CHECK-NEXT: ## %bb.2: ## %if.then
; CHECK-NEXT: movq %rax, %rdi
-; CHECK-NEXT: movq %r14, %rsi
-; CHECK-NEXT: movq %rax, %rbx
-; CHECK-NEXT: callq _strcpy
-; CHECK-NEXT: jmp LBB6_3
-; CHECK-NEXT: LBB6_2: ## %if.else
-; CHECK-NEXT: movq %rbx, %rdi
-; CHECK-NEXT: movq %r14, %rsi
+; CHECK-NEXT: movq %rbx, %rsi
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: popq %r15
+; CHECK-NEXT: jmp _strcpy ## TAILCALL
+; CHECK-NEXT: LBB6_1: ## %if.else
+; CHECK-NEXT: movq %r14, %rdi
+; CHECK-NEXT: movq %rbx, %rsi
; CHECK-NEXT: callq _baz
-; CHECK-NEXT: LBB6_3: ## %return
-; CHECK-NEXT: movq %rbx, %rax
+; CHECK-NEXT: movq %r14, %rax
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: popq %r14
; CHECK-NEXT: popq %r15
More information about the llvm-commits
mailing list