[llvm] [CGP] Permit tail call optimization on undefined return value (PR #82419)
Antonio Frighetto via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 22 01:09:51 PST 2024
https://github.com/antoniofrighetto updated https://github.com/llvm/llvm-project/pull/82419
>From 25e7e8d993f12f391ad90d23b5c3e2385ebafc81 Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Tue, 20 Feb 2024 22:13:46 +0100
Subject: [PATCH] [CGP] Permit tail call optimization on undefined return value
We may freely allow tail call optzs on undef values as well.
Fixes: https://github.com/llvm/llvm-project/issues/82387.
---
llvm/lib/CodeGen/CodeGenPrepare.cpp | 5 +-
llvm/test/CodeGen/AArch64/addsub.ll | 6 +-
.../CodeGen/AArch64/callbr-asm-obj-file.ll | 2 +-
llvm/test/CodeGen/RISCV/pr51206.ll | 12 ++--
llvm/test/CodeGen/X86/tailcall-cgp-dup.ll | 58 ++++++++++++++++++-
5 files changed, 66 insertions(+), 17 deletions(-)
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 4036f18dbc6794..feefe87f406365 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -2686,8 +2686,9 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
attributesPermitTailCall(F, CI, RetI, *TLI)) {
// Either we return void or the return value must be the first
// argument of a known intrinsic or library function.
- if (!V || (isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
- V == CI->getArgOperand(0))) {
+ if (!V || isa<UndefValue>(V) ||
+ (isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
+ V == CI->getArgOperand(0))) {
TailCallBBs.push_back(Pred);
}
}
diff --git a/llvm/test/CodeGen/AArch64/addsub.ll b/llvm/test/CodeGen/AArch64/addsub.ll
index 1b86fe6c707c8e..20215fe9146924 100644
--- a/llvm/test/CodeGen/AArch64/addsub.ll
+++ b/llvm/test/CodeGen/AArch64/addsub.ll
@@ -662,17 +662,13 @@ define dso_local i32 @_extract_crng_crng() {
; CHECK-NEXT: cmn x8, #1272
; CHECK-NEXT: b.pl .LBB36_3
; CHECK-NEXT: .LBB36_2: // %if.then
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: adrp x8, primary_crng
; CHECK-NEXT: ldr w8, [x8, :lo12:primary_crng]
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: adrp x8, input_pool
; CHECK-NEXT: add x8, x8, :lo12:input_pool
; CHECK-NEXT: csel x0, xzr, x8, eq
-; CHECK-NEXT: bl crng_reseed
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: b crng_reseed
; CHECK-NEXT: .LBB36_3: // %if.end
; CHECK-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/AArch64/callbr-asm-obj-file.ll b/llvm/test/CodeGen/AArch64/callbr-asm-obj-file.ll
index 94041bf00218ca..e601f03d524a4a 100644
--- a/llvm/test/CodeGen/AArch64/callbr-asm-obj-file.ll
+++ b/llvm/test/CodeGen/AArch64/callbr-asm-obj-file.ll
@@ -40,7 +40,7 @@ declare dso_local i32 @g(...) local_unnamed_addr
declare dso_local i32 @i(...) local_unnamed_addr
; CHECK-LABEL: <test2>:
-; CHECK: bl {{.*}} <test2+0x18>
+; CHECK: b {{.*}} <test2+0x1c>
; CHECK-LABEL: <$d.5>:
; CHECK-LABEL: <$x.6>:
; CHECK-NEXT: b {{.*}} <test2+0x18>
diff --git a/llvm/test/CodeGen/RISCV/pr51206.ll b/llvm/test/CodeGen/RISCV/pr51206.ll
index f54031af0de5e6..8aa145f6ac5efa 100644
--- a/llvm/test/CodeGen/RISCV/pr51206.ll
+++ b/llvm/test/CodeGen/RISCV/pr51206.ll
@@ -27,16 +27,12 @@ define signext i32 @wobble() nounwind {
; CHECK-NEXT: lui a2, %hi(global.3)
; CHECK-NEXT: li a3, 5
; CHECK-NEXT: sw a1, %lo(global.3)(a2)
-; CHECK-NEXT: bltu a0, a3, .LBB0_2
-; CHECK-NEXT: # %bb.1: # %bb10
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; CHECK-NEXT: call quux
-; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; CHECK-NEXT: addi sp, sp, 16
-; CHECK-NEXT: .LBB0_2: # %bb12
+; CHECK-NEXT: bgeu a0, a3, .LBB0_2
+; CHECK-NEXT: # %bb.1: # %bb12
; CHECK-NEXT: li a0, 0
; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB0_2: # %bb10
+; CHECK-NEXT: tail quux
bb:
%tmp = load i8, ptr @global, align 1
%tmp1 = zext i8 %tmp to i32
diff --git a/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll b/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll
index 401ed9f7bc5a9e..8a9ee60f341c2b 100644
--- a/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll
+++ b/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll
@@ -339,7 +339,7 @@ return:
define ptr @strcpy_illegal_tailc(ptr %dest, i64 %sz, ptr readonly returned %src) nounwind {
; CHECK-LABEL: strcpy_illegal_tailc:
-; CHECK: ## %bb.0:
+; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: movq %rdx, %rbx
; CHECK-NEXT: testq %rsi, %rsi
@@ -351,6 +351,7 @@ define ptr @strcpy_illegal_tailc(ptr %dest, i64 %sz, ptr readonly returned %src)
; CHECK-NEXT: movq %rbx, %rax
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: retq
+entry:
%cmp = icmp eq i64 %sz, 0
br i1 %cmp, label %return, label %if.then
@@ -362,8 +363,63 @@ return:
ret ptr %src
}
+ at i = global i32 0, align 4
+
+define i32 @undef_tailc() nounwind {
+; CHECK-LABEL: undef_tailc:
+; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: cmpl $0, _i(%rip)
+; CHECK-NEXT: jne _qux ## TAILCALL
+; CHECK-NEXT: ## %bb.1: ## %return
+; CHECK-NEXT: retq
+entry:
+ %val = load i32, ptr @i, align 4
+ %cmp = icmp eq i32 %val, 0
+ br i1 %cmp, label %return, label %if.then
+
+if.then:
+ %rv_unused = tail call i32 @qux()
+ br label %return
+
+return:
+ ret i32 undef
+}
+
+define i32 @undef_and_known_tailc() nounwind {
+; CHECK-LABEL: undef_and_known_tailc:
+; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: movl _i(%rip), %eax
+; CHECK-NEXT: cmpl $5, %eax
+; CHECK-NEXT: je _qux ## TAILCALL
+; CHECK-NEXT: ## %bb.1: ## %entry
+; CHECK-NEXT: cmpl $2, %eax
+; CHECK-NEXT: je _quux ## TAILCALL
+; CHECK-NEXT: ## %bb.2: ## %return
+; CHECK-NEXT: retq
+entry:
+ %val = load i32, ptr @i, align 4
+ switch i32 %val, label %return [
+ i32 2, label %case_2
+ i32 5, label %case_5
+ ]
+
+case_2:
+ %rv_unused = tail call i32 @quux()
+ br label %return
+
+case_5:
+ %rv = tail call i32 @qux()
+ br label %return
+
+return:
+ %phi = phi i32 [ undef, %case_2 ], [ %rv, %case_5 ], [ undef, %entry ]
+ ret i32 %phi
+}
+
declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1)
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1)
declare noalias ptr @malloc(i64)
declare ptr @strcpy(ptr noalias returned writeonly, ptr noalias nocapture readonly)
declare ptr @baz(ptr, ptr)
+declare i32 @qux()
+declare i32 @quux()
More information about the llvm-commits
mailing list