[llvm] [CGP] Consider arguments and ret values in `dupRetToEnableTailCallOpts` (PR #76613)

Antonio Frighetto via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 17 09:55:00 PST 2024

https://github.com/antoniofrighetto updated https://github.com/llvm/llvm-project/pull/76613

>From 9fbef3ed5c7dfac006ece77236db9c436df5fa9b Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Sat, 30 Dec 2023 00:21:12 +0100
Subject: [PATCH 1/2] [CGP] Precommit tests for PR76613 (NFC)

 llvm/test/CodeGen/X86/tailcall-cgp-dup.ll | 133 +++++++++++++++++++++-
 1 file changed, 132 insertions(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll b/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll
index 75bbae1050d61c..8417c94c7dae61 100644
--- a/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll
+++ b/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "cfi"
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
 ; RUN: opt -S -passes='require<profile-summary>,function(codegenprepare)' %s -mtriple=x86_64-apple-darwin -o - | FileCheck %s --check-prefix OPT
@@ -184,3 +184,134 @@ return:
   %retval = phi ptr [ %ptr, %if.then ], [ %obj, %entry ]
   ret ptr %retval
+define ptr @memset_tailc(ptr %ret_val, i64 %sz) {
+; CHECK-LABEL: memset_tailc:
+; CHECK:  ## %bb.0: ## %entry
+; CHECK:    pushq %rbx
+; CHECK:    movq %rdi, %rbx
+; CHECK:    testq %rdi, %rdi
+; CHECK:    je LBB4_2
+; CHECK:  ## %bb.1: ## %if.then
+; CHECK:    movq %rsi, %rdx
+; CHECK:    movq %rbx, %rdi
+; CHECK:    xorl %esi, %esi
+; CHECK:    callq _memset
+; CHECK:  LBB4_2: ## %return
+; CHECK:    movq %rbx, %rax
+; CHECK:    popq %rbx
+; CHECK:    retq
+  %cmp = icmp eq ptr %ret_val, null
+  br i1 %cmp, label %return, label %if.then
+  tail call void @llvm.memset.p0.i64(ptr nonnull align 1 %ret_val, i8 0, i64 %sz, i1 false)
+  br label %return
+  ret ptr %ret_val
+define ptr @memcpy_tailc(ptr %ret_val, i64 %sz, ptr %src) {
+; CHECK-LABEL: memcpy_tailc:
+; CHECK:  ## %bb.0: ## %entry
+; CHECK:    pushq %rbx
+; CHECK:    testq %rsi, %rsi
+; CHECK:    je LBB5_1
+; CHECK:  ## %bb.2: ## %if.then
+; CHECK:    movq %rsi, %rax
+; CHECK:    movq %rdi, %rbx
+; CHECK:    movq %rdx, %rsi
+; CHECK:    movq %rax, %rdx
+; CHECK:    callq _memcpy
+; CHECK:    jmp LBB5_3
+; CHECK:  LBB5_1:
+; CHECK:    movq %rdx, %rbx
+; CHECK:  LBB5_3: ## %return
+; CHECK:    movq %rbx, %rax
+; CHECK:    popq %rbx
+; CHECK:    retq
+  %cmp = icmp eq i64 %sz, 0
+  br i1 %cmp, label %return, label %if.then
+  tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 %ret_val, ptr align 1 %src, i64 %sz, i1 false)
+  br label %return
+  %phi = phi ptr [ %ret_val, %if.then ], [ %src, %entry ]
+  ret ptr %phi
+define ptr @strcpy_tailc(i64 %0, ptr %src) {
+; CHECK-LABEL: strcpy_tailc:
+; CHECK:  ## %bb.0: ## %entry
+; CHECK:    pushq %r14
+; CHECK:    pushq %rbx
+; CHECK:    pushq %rax
+; CHECK:    movq %rsi, %rbx
+; CHECK:    movq %rdi, %r14
+; CHECK:    movl $20, %edi
+; CHECK:    callq _malloc
+; CHECK:    testq %r14, %r14
+; CHECK:    je LBB6_2
+; CHECK:  ## %bb.1: ## %if.then
+; CHECK:    movq %rax, %rdi
+; CHECK:    movq %rbx, %rsi
+; CHECK:    movq %rax, %rbx
+; CHECK:    callq _strcpy
+; CHECK:  LBB6_2: ## %return
+; CHECK:    movq %rbx, %rax
+; CHECK:    addq $8, %rsp
+; CHECK:    popq %rbx
+; CHECK:    popq %r14
+; CHECK:    retq
+  %ret_val = tail call noalias dereferenceable_or_null(20) ptr @malloc(i64 20)
+  %cmp = icmp eq i64 %0, 0
+  br i1 %cmp, label %return, label %if.then
+  %rv_unused = tail call ptr @strcpy(ptr dereferenceable(1) %ret_val, ptr dereferenceable(1) %src)
+  br label %return
+  %phi = phi ptr [ %ret_val, %if.then ], [ %src, %entry ]
+  ret ptr %phi
+define ptr @memset_not_to_tailc(ptr %arg, i64 %sz, ptr %ret_val_1, ptr %ret_val_2) {
+; CHECK-LABEL: memset_not_to_tailc:
+; CHECK:  ## %bb.0: ## %entry
+; CHECK:    movq %rdx, %rax
+; CHECK:    testq %rsi, %rsi
+; CHECK:    je LBB7_2
+; CHECK:  ## %bb.1: ## %if.then
+; CHECK:    pushq %rbx
+; CHECK:    movq %rcx, %rbx
+; CHECK:    movq %rsi, %rdx
+; CHECK:    xorl %esi, %esi
+; CHECK:    callq _memset
+; CHECK:    movq %rbx, %rax
+; CHECK:    popq %rbx
+; CHECK:  LBB7_2: ## %return
+; CHECK:    retq
+  %cmp = icmp eq i64 %sz, 0
+  br i1 %cmp, label %return, label %if.then
+  tail call void @llvm.memset.p0.i64(ptr align 1 %arg, i8 0, i64 %sz, i1 false)
+  br label %return
+  %phi = phi ptr [ %ret_val_2, %if.then ], [ %ret_val_1, %entry ]
+  ret ptr %phi
+declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg)
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
+declare noalias ptr @malloc(i64)
+declare ptr @strcpy(ptr noalias returned writeonly, ptr noalias nocapture readonly)

>From 6608bacd2208be3208fc7739b37c85066c3a79e9 Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Sat, 30 Dec 2023 00:25:23 +0100
Subject: [PATCH 2/2] [CGP] Consider arguments and ret values in

Hint further tail call optimization opportunities when the examined
returned value is either the return value of a call instruction, or
a function argument. Moreover, take into account the cases in which
incoming values from phi-nodes, not directly tail call instructions,
may still be simplified.

Fixes: https://github.com/llvm/llvm-project/issues/75455.
 llvm/lib/CodeGen/CodeGenPrepare.cpp           | 37 ++++++++++++++++--
 .../CodeGen/Thumb2/constant-islands-cbz.ll    | 18 ++++-----
 llvm/test/CodeGen/X86/tailcall-cgp-dup.ll     | 38 ++++++++-----------
 .../X86/live-debug-values-expr-conflict.ll    | 16 ++++----
 4 files changed, 65 insertions(+), 44 deletions(-)

diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index ff61f1a4a0436d..6f5e7d6c7b4066 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -2524,7 +2524,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
 /// Look for opportunities to duplicate return instructions to the predecessor
-/// to enable tail call optimizations. The case it is currently looking for is:
+/// to enable tail call optimizations. The case it is currently looking for are
+/// simple return of call values, function arguments, or phi nodes as follows:
 /// @code
 /// bb0:
 ///   %tmp0 = tail call i32 @f0()
@@ -2581,7 +2582,7 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
     PN = dyn_cast<PHINode>(V);
-    if (!PN)
+    if (!PN && !isa<Argument>(V) && !isa<CallInst>(V))
       return false;
@@ -2621,8 +2622,38 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
       // Make sure the phi value is indeed produced by the tail call.
       if (CI && CI->hasOneUse() && CI->getParent() == PredBB &&
           TLI->mayBeEmittedAsTailCall(CI) &&
-          attributesPermitTailCall(F, CI, RetI, *TLI))
+          attributesPermitTailCall(F, CI, RetI, *TLI)) {
+      } else {
+        /// Consider the cases in which the phi value is indirectly produced by
+        /// the tail call, for example when encountering memset(), memmove(),
+        /// whose return value may have been optimized out.
+        /// @code
+        /// bb0:
+        ///   tail call void @llvm.memset.p0.i64()
+        ///   br label %return
+        /// @endcode
+        if (PredBB && PredBB->getSingleSuccessor() == BB)
+          CI = dyn_cast_or_null<CallInst>(
+              PredBB->getTerminator()->getPrevNonDebugInstruction(true));
+        // If we return void from the call site, it must be an intrinsic.
+        if (CI && CI->getType()->isVoidTy() && !isa<IntrinsicInst>(CI))
+          continue;
+        /// If we return a value, it must not be used. A valid case may be when
+        /// strcpy()'ing, where its first argument is used in the phi-node.
+        /// @code
+        /// bb0:
+        ///   %1 = tail call ptr @strcpy()
+        ///   br label %return
+        /// @endcode
+        if (CI && CI->use_empty() && CI->hasArgument(IncomingVal) &&
+            IncomingVal == CI->getArgOperand(0) &&
+            TLI->mayBeEmittedAsTailCall(CI) &&
+            attributesPermitTailCall(F, CI, RetI, *TLI))
+          TailCallBBs.push_back(PredBB);
+      }
   } else {
     SmallPtrSet<BasicBlock *, 4> VisitedBBs;
diff --git a/llvm/test/CodeGen/Thumb2/constant-islands-cbz.ll b/llvm/test/CodeGen/Thumb2/constant-islands-cbz.ll
index fc42d4e0e4d8c0..6e5cf4d335d94e 100644
--- a/llvm/test/CodeGen/Thumb2/constant-islands-cbz.ll
+++ b/llvm/test/CodeGen/Thumb2/constant-islands-cbz.ll
@@ -8,12 +8,11 @@ define ptr @test(ptr returned %this, i32 %event_size, ptr %event_pointer) {
 ; CHECK-T1-NEXT:    .save {r4, lr}
 ; CHECK-T1-NEXT:    push {r4, lr}
 ; CHECK-T1-NEXT:    mov r4, r0
-; CHECK-T1-NEXT:    movs r0, #0
-; CHECK-T1-NEXT:    str r0, [r4, #4]
-; CHECK-T1-NEXT:    str r0, [r4, #8]
-; CHECK-T1-NEXT:    str r0, [r4, #12]
-; CHECK-T1-NEXT:    str r0, [r4, #16]
-; CHECK-T1-NEXT:    mov r0, r4
+; CHECK-T1-NEXT:    movs r3, #0
+; CHECK-T1-NEXT:    str r3, [r0, #4]
+; CHECK-T1-NEXT:    str r3, [r0, #8]
+; CHECK-T1-NEXT:    str r3, [r0, #12]
+; CHECK-T1-NEXT:    str r3, [r0, #16]
 ; CHECK-T1-NEXT:    cbz r2, .LBB0_2
 ; CHECK-T1-NEXT:  @ %bb.1: @ %if.else
 ; CHECK-T1-NEXT:    bl equeue_create_inplace
@@ -28,11 +27,10 @@ define ptr @test(ptr returned %this, i32 %event_size, ptr %event_pointer) {
 ; CHECK-T2:       @ %bb.0: @ %entry
 ; CHECK-T2-NEXT:    .save {r4, lr}
 ; CHECK-T2-NEXT:    push {r4, lr}
+; CHECK-T2-NEXT:    movs r3, #0
 ; CHECK-T2-NEXT:    mov r4, r0
-; CHECK-T2-NEXT:    movs r0, #0
-; CHECK-T2-NEXT:    strd r0, r0, [r4, #4]
-; CHECK-T2-NEXT:    strd r0, r0, [r4, #12]
-; CHECK-T2-NEXT:    mov r0, r4
+; CHECK-T2-NEXT:    strd r3, r3, [r0, #4]
+; CHECK-T2-NEXT:    strd r3, r3, [r0, #12]
 ; CHECK-T2-NEXT:    cbz r2, .LBB0_2
 ; CHECK-T2-NEXT:  @ %bb.1: @ %if.else
 ; CHECK-T2-NEXT:    bl equeue_create_inplace
diff --git a/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll b/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll
index 8417c94c7dae61..15bb9834a1aa3c 100644
--- a/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll
+++ b/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll
@@ -188,18 +188,14 @@ return:
 define ptr @memset_tailc(ptr %ret_val, i64 %sz) {
 ; CHECK-LABEL: memset_tailc:
 ; CHECK:  ## %bb.0: ## %entry
-; CHECK:    pushq %rbx
-; CHECK:    movq %rdi, %rbx
 ; CHECK:    testq %rdi, %rdi
-; CHECK:    je LBB4_2
-; CHECK:  ## %bb.1: ## %if.then
+; CHECK:    je LBB4_1
+; CHECK:  ## %bb.2: ## %if.then
 ; CHECK:    movq %rsi, %rdx
-; CHECK:    movq %rbx, %rdi
 ; CHECK:    xorl %esi, %esi
-; CHECK:    callq _memset
-; CHECK:  LBB4_2: ## %return
-; CHECK:    movq %rbx, %rax
-; CHECK:    popq %rbx
+; CHECK:    jmp _memset ## TAILCALL
+; CHECK:  LBB4_1: ## %return
+; CHECK:    movq %rdi, %rax
 ; CHECK:    retq
   %cmp = icmp eq ptr %ret_val, null
@@ -216,21 +212,15 @@ return:
 define ptr @memcpy_tailc(ptr %ret_val, i64 %sz, ptr %src) {
 ; CHECK-LABEL: memcpy_tailc:
 ; CHECK:  ## %bb.0: ## %entry
-; CHECK:    pushq %rbx
 ; CHECK:    testq %rsi, %rsi
 ; CHECK:    je LBB5_1
 ; CHECK:  ## %bb.2: ## %if.then
 ; CHECK:    movq %rsi, %rax
-; CHECK:    movq %rdi, %rbx
 ; CHECK:    movq %rdx, %rsi
 ; CHECK:    movq %rax, %rdx
-; CHECK:    callq _memcpy
-; CHECK:    jmp LBB5_3
-; CHECK:  LBB5_1:
-; CHECK:    movq %rdx, %rbx
-; CHECK:  LBB5_3: ## %return
-; CHECK:    movq %rbx, %rax
-; CHECK:    popq %rbx
+; CHECK:    jmp _memcpy ## TAILCALL
+; CHECK:  LBB5_1: ## %return
+; CHECK:    movq %rdx, %rax
 ; CHECK:    retq
   %cmp = icmp eq i64 %sz, 0
@@ -256,13 +246,15 @@ define ptr @strcpy_tailc(i64 %0, ptr %src) {
 ; CHECK:    movl $20, %edi
 ; CHECK:    callq _malloc
 ; CHECK:    testq %r14, %r14
-; CHECK:    je LBB6_2
-; CHECK:  ## %bb.1: ## %if.then
+; CHECK:    je LBB6_1
+; CHECK:  ## %bb.2: ## %if.then
 ; CHECK:    movq %rax, %rdi
 ; CHECK:    movq %rbx, %rsi
-; CHECK:    movq %rax, %rbx
-; CHECK:    callq _strcpy
-; CHECK:  LBB6_2: ## %return
+; CHECK:    addq $8, %rsp
+; CHECK:    popq %rbx
+; CHECK:    popq %r14
+; CHECK:    jmp _strcpy ## TAILCALL
+; CHECK:  LBB6_1: ## %return
 ; CHECK:    movq %rbx, %rax
 ; CHECK:    addq $8, %rsp
 ; CHECK:    popq %rbx
diff --git a/llvm/test/DebugInfo/X86/live-debug-values-expr-conflict.ll b/llvm/test/DebugInfo/X86/live-debug-values-expr-conflict.ll
index afce27ee4abb9b..2769d39fa54aa2 100644
--- a/llvm/test/DebugInfo/X86/live-debug-values-expr-conflict.ll
+++ b/llvm/test/DebugInfo/X86/live-debug-values-expr-conflict.ll
@@ -24,18 +24,18 @@
 ; one in the block two, and none in block three.
 ; CHECK:       ![[BAZVAR:[0-9]+]] = !DILocalVariable(name: "baz",
 ; CHECK-LABEL: bb.0.entry:
-; CHECK:       DBG_VALUE {{[0-9a-zA-Z$%_]*}}, $noreg, ![[BAZVAR]], 
+; CHECK:       DBG_VALUE {{[0-9a-zA-Z$%_]*}}, $noreg, ![[BAZVAR]], 
 ; CHECK-SAME:     !DIExpression()
 ; CHECK-LABEL: bb.1.if.then:
-; CHECK-LABEL: bb.2.if.else:
-; CHECK:       DBG_VALUE {{[0-9a-zA-Z$%_]*}}, $noreg, ![[BAZVAR]], 
+; CHECK-LABEL: bb.3.if.else:
+; CHECK:       DBG_VALUE {{[0-9a-zA-Z$%_]*}}, $noreg, ![[BAZVAR]], 
 ; CHECK-SAME:     !DIExpression()
-; CHECK-SAME:     !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_plus_uconst, 1, DW_OP_stack_value)
-; CHECK-SAME:     {{[0-9a-zA-Z$%_]*}}
-; CHECK-LABEL: bb.3.if.end:
+; CHECK-SAME:     !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_plus_uconst, 1, DW_OP_stack_value)
+; CHECK-SAME:     {{[0-9a-zA-Z$%_]*}}
+; CHECK-LABEL: bb.2.if.then:
 declare void @escape1(i32)
 declare void @escape2(i32)

More information about the llvm-commits mailing list