[llvm] [CGP] Do not fold ret value if constant in `dupRetToEnableTailCallOpts` (PR #76613)

Antonio Frighetto via llvm-commits llvm-commits at lists.llvm.org
Sat Dec 30 06:57:13 PST 2023


https://github.com/antoniofrighetto updated https://github.com/llvm/llvm-project/pull/76613

>From 183eb5bb83150c0e3b253af9db2f3ca41cd36f30 Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Sat, 30 Dec 2023 00:21:12 +0100
Subject: [PATCH 1/2] [CGP] Precommit tests for PR76613 (NFC)

---
 llvm/test/CodeGen/X86/tailcall-cgp-dup.ll | 94 +++++++++++++++++++++++
 1 file changed, 94 insertions(+)

diff --git a/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll b/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll
index 48440558283d45..af2fec4b5dffcf 100644
--- a/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll
+++ b/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll
@@ -184,3 +184,97 @@ return:
   %retval = phi ptr [ %ptr, %if.then ], [ %obj, %entry ]
   ret ptr %retval
 }
+
+define noundef ptr @memset_tailc(ptr noundef %0, i64 noundef %1) {
+; CHECK-LABEL: memset_tailc:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset %rbx, -16
+; CHECK-NEXT:    movq %rdi, %rbx
+; CHECK-NEXT:    testq %rdi, %rdi
+; CHECK-NEXT:    je LBB4_2
+; CHECK-NEXT:  ## %bb.1:
+; CHECK-NEXT:    movq %rsi, %rdx
+; CHECK-NEXT:    movq %rbx, %rdi
+; CHECK-NEXT:    xorl %esi, %esi
+; CHECK-NEXT:    callq _memset
+; CHECK-NEXT:  LBB4_2:
+; CHECK-NEXT:    movq %rbx, %rax
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    retq
+  %3 = icmp eq ptr %0, null
+  br i1 %3, label %5, label %4
+
+4:
+  tail call void @llvm.memset.p0.i64(ptr nonnull align 1 %0, i8 0, i64 %1, i1 false)
+  br label %5
+
+5:
+  ret ptr %0
+}
+
+define noundef ptr @memcpy_tailc(ptr noundef %0, i64 noundef %1, ptr noundef %2) {
+; CHECK-LABEL: memcpy_tailc:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset %rbx, -16
+; CHECK-NEXT:    testq %rsi, %rsi
+; CHECK-NEXT:    je LBB5_1
+; CHECK-NEXT:  ## %bb.2:
+; CHECK-NEXT:    movq %rsi, %rax
+; CHECK-NEXT:    movq %rdi, %rbx
+; CHECK-NEXT:    movq %rdx, %rsi
+; CHECK-NEXT:    movq %rax, %rdx
+; CHECK-NEXT:    callq _memcpy
+; CHECK-NEXT:    jmp LBB5_3
+; CHECK-NEXT:  LBB5_1:
+; CHECK-NEXT:    movq %rdx, %rbx
+; CHECK-NEXT:  LBB5_3:
+; CHECK-NEXT:    movq %rbx, %rax
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    retq
+  %4 = icmp eq i64 %1, 0
+  br i1 %4, label %6, label %5
+
+5:
+  tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 %0, ptr align 1 %2, i64 %1, i1 false)
+  br label %6
+
+6:
+  %7 = phi ptr [ %0, %5 ], [ %2, %3 ]
+  ret ptr %7
+}
+
+define noundef ptr @not_to_tailc(ptr noundef %0, i64 noundef %1, ptr noundef %2) {
+; CHECK-LABEL: not_to_tailc:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset %rbx, -16
+; CHECK-NEXT:    movq %rdx, %rbx
+; CHECK-NEXT:    testq %rsi, %rsi
+; CHECK-NEXT:    je LBB6_2
+; CHECK-NEXT:  ## %bb.1:
+; CHECK-NEXT:    movq %rsi, %rdx
+; CHECK-NEXT:    movq %rbx, %rsi
+; CHECK-NEXT:    callq _baz
+; CHECK-NEXT:  LBB6_2:
+; CHECK-NEXT:    movq %rbx, %rax
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    retq
+  %4 = icmp eq i64 %1, 0
+  br i1 %4, label %6, label %5
+
+5:
+  tail call void @baz(ptr noundef %0, ptr noundef %2, i64 noundef %1)
+  br label %6
+
+6:
+  ret ptr %2
+}
+
+declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #1
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
+declare void @baz(ptr noundef, ptr noundef, i64 noundef)

>From 12fe22e1dfa71d0bd5fd1dd225b58be3d7c8de77 Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Sat, 30 Dec 2023 00:25:23 +0100
Subject: [PATCH 2/2] [CGP] Do not fold ret value if constant in
 `dupRetToEnableTailCallOpts`

Unless the returned value is a constant, which may result in duplicated
code later in codegen, there should be no reason to refrain from hinting
further tail call optimization opportunities. Consider also the cases
in which incoming values in phi-nodes, not coming directly from call
instructions, may still be simplified.

Fixes: https://github.com/llvm/llvm-project/issues/75455.
---
 llvm/lib/CodeGen/CodeGenPrepare.cpp           |  14 +-
 .../CodeGen/Thumb2/constant-islands-cbz.ll    |  18 ++-
 .../X86/tail-dup-merge-loop-headers.ll        | 129 +++++++++---------
 llvm/test/CodeGen/X86/tailcall-cgp-dup.ll     |  28 +---
 llvm/test/CodeGen/X86/x86-shrink-wrapping.ll  |   4 +-
 .../X86/live-debug-values-expr-conflict.ll    |  16 +--
 6 files changed, 99 insertions(+), 110 deletions(-)

diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 6e99fb133e26a9..90d6818e1e9610 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -2520,6 +2520,9 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
   BitCastInst *BCI = nullptr;
   Value *V = RetI->getReturnValue();
   if (V) {
+    if (isa<Constant>(V))
+      return false;
+
     BCI = dyn_cast<BitCastInst>(V);
     if (BCI)
       V = BCI->getOperand(0);
@@ -2532,8 +2535,6 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
     }
 
     PN = dyn_cast<PHINode>(V);
-    if (!PN)
-      return false;
   }
 
   if (PN && PN->getParent() != BB)
@@ -2569,8 +2570,15 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
       Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts();
       CallInst *CI = dyn_cast<CallInst>(IncomingVal);
       BasicBlock *PredBB = PN->getIncomingBlock(I);
+      // Do we have a call whose return value may have been optimized out (e.g.
+      // __builtin_memset) and an unconditional jump as terminator eligible to
+      // be folded?
+      if (!CI && PredBB && PredBB->getSingleSuccessor() == BB)
+        CI = dyn_cast_or_null<CallInst>(
+            PredBB->getTerminator()->getPrevNonDebugInstruction(true));
+
       // Make sure the phi value is indeed produced by the tail call.
-      if (CI && CI->hasOneUse() && CI->getParent() == PredBB &&
+      if (CI && CI->getParent() == PredBB && CI->getNumUses() <= 1 &&
           TLI->mayBeEmittedAsTailCall(CI) &&
           attributesPermitTailCall(F, CI, RetI, *TLI))
         TailCallBBs.push_back(PredBB);
diff --git a/llvm/test/CodeGen/Thumb2/constant-islands-cbz.ll b/llvm/test/CodeGen/Thumb2/constant-islands-cbz.ll
index fc42d4e0e4d8c0..6e5cf4d335d94e 100644
--- a/llvm/test/CodeGen/Thumb2/constant-islands-cbz.ll
+++ b/llvm/test/CodeGen/Thumb2/constant-islands-cbz.ll
@@ -8,12 +8,11 @@ define ptr @test(ptr returned %this, i32 %event_size, ptr %event_pointer) {
 ; CHECK-T1-NEXT:    .save {r4, lr}
 ; CHECK-T1-NEXT:    push {r4, lr}
 ; CHECK-T1-NEXT:    mov r4, r0
-; CHECK-T1-NEXT:    movs r0, #0
-; CHECK-T1-NEXT:    str r0, [r4, #4]
-; CHECK-T1-NEXT:    str r0, [r4, #8]
-; CHECK-T1-NEXT:    str r0, [r4, #12]
-; CHECK-T1-NEXT:    str r0, [r4, #16]
-; CHECK-T1-NEXT:    mov r0, r4
+; CHECK-T1-NEXT:    movs r3, #0
+; CHECK-T1-NEXT:    str r3, [r0, #4]
+; CHECK-T1-NEXT:    str r3, [r0, #8]
+; CHECK-T1-NEXT:    str r3, [r0, #12]
+; CHECK-T1-NEXT:    str r3, [r0, #16]
 ; CHECK-T1-NEXT:    cbz r2, .LBB0_2
 ; CHECK-T1-NEXT:  @ %bb.1: @ %if.else
 ; CHECK-T1-NEXT:    bl equeue_create_inplace
@@ -28,11 +27,10 @@ define ptr @test(ptr returned %this, i32 %event_size, ptr %event_pointer) {
 ; CHECK-T2:       @ %bb.0: @ %entry
 ; CHECK-T2-NEXT:    .save {r4, lr}
 ; CHECK-T2-NEXT:    push {r4, lr}
+; CHECK-T2-NEXT:    movs r3, #0
 ; CHECK-T2-NEXT:    mov r4, r0
-; CHECK-T2-NEXT:    movs r0, #0
-; CHECK-T2-NEXT:    strd r0, r0, [r4, #4]
-; CHECK-T2-NEXT:    strd r0, r0, [r4, #12]
-; CHECK-T2-NEXT:    mov r0, r4
+; CHECK-T2-NEXT:    strd r3, r3, [r0, #4]
+; CHECK-T2-NEXT:    strd r3, r3, [r0, #12]
 ; CHECK-T2-NEXT:    cbz r2, .LBB0_2
 ; CHECK-T2-NEXT:  @ %bb.1: @ %if.else
 ; CHECK-T2-NEXT:    bl equeue_create_inplace
diff --git a/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll b/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll
index 937cc173e7faef..404b8b13328de7 100644
--- a/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll
+++ b/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll
@@ -95,102 +95,92 @@ define i32 @loop_shared_header(i8* %exe, i32 %exesz, i32 %headsize, i32 %min, i3
 ; CHECK-NEXT:    pushq %r12
 ; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    movl $1, %ebx
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    testb %al, %al
-; CHECK-NEXT:    jne .LBB1_24
+; CHECK-NEXT:    jne .LBB1_22
 ; CHECK-NEXT:  # %bb.1: # %if.end19
 ; CHECK-NEXT:    movl %esi, %ebp
-; CHECK-NEXT:    movq %rdi, %r15
-; CHECK-NEXT:    movl (%rax), %r13d
-; CHECK-NEXT:    leal (,%r13,4), %ebx
-; CHECK-NEXT:    movl %ebx, %r12d
+; CHECK-NEXT:    movq %rdi, %r14
+; CHECK-NEXT:    movl (%rax), %r12d
+; CHECK-NEXT:    leal (,%r12,4), %r13d
+; CHECK-NEXT:    movl %r13d, %r15d
 ; CHECK-NEXT:    movl $1, %esi
-; CHECK-NEXT:    movq %r12, %rdi
+; CHECK-NEXT:    movq %r15, %rdi
 ; CHECK-NEXT:    callq cli_calloc at PLT
 ; CHECK-NEXT:    testl %ebp, %ebp
-; CHECK-NEXT:    je .LBB1_23
+; CHECK-NEXT:    je .LBB1_22
 ; CHECK-NEXT:  # %bb.2: # %if.end19
-; CHECK-NEXT:    testl %r13d, %r13d
-; CHECK-NEXT:    je .LBB1_23
+; CHECK-NEXT:    testl %r12d, %r12d
+; CHECK-NEXT:    je .LBB1_22
 ; CHECK-NEXT:  # %bb.3: # %if.end19
-; CHECK-NEXT:    movq %rax, %r14
+; CHECK-NEXT:    movq %rax, %rbx
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    testb %al, %al
-; CHECK-NEXT:    jne .LBB1_23
+; CHECK-NEXT:    jne .LBB1_22
 ; CHECK-NEXT:  # %bb.4: # %if.end19
-; CHECK-NEXT:    cmpq %r15, %r14
-; CHECK-NEXT:    jb .LBB1_23
+; CHECK-NEXT:    cmpq %r14, %rbx
+; CHECK-NEXT:    jb .LBB1_22
 ; CHECK-NEXT:  # %bb.5: # %if.end50
-; CHECK-NEXT:    movq %r14, %rdi
-; CHECK-NEXT:    movq %r12, %rdx
+; CHECK-NEXT:    movq %rbx, %rdi
+; CHECK-NEXT:    movq %r15, %rdx
 ; CHECK-NEXT:    callq memcpy at PLT
-; CHECK-NEXT:    cmpl $4, %ebx
-; CHECK-NEXT:    jb .LBB1_26
+; CHECK-NEXT:    cmpl $4, %r13d
+; CHECK-NEXT:    jb .LBB1_21
 ; CHECK-NEXT:  # %bb.6: # %shared_preheader
 ; CHECK-NEXT:    movb $32, %cl
 ; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    jmp .LBB1_8
+; CHECK-NEXT:    jmp .LBB1_7
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB1_7: # %merge_predecessor_split
-; CHECK-NEXT:    # in Loop: Header=BB1_8 Depth=1
+; CHECK-NEXT:  .LBB1_17: # %merge_predecessor_split
+; CHECK-NEXT:    # in Loop: Header=BB1_7 Depth=1
 ; CHECK-NEXT:    movb $32, %cl
-; CHECK-NEXT:  .LBB1_8: # %outer_loop_header
+; CHECK-NEXT:  .LBB1_7: # %outer_loop_header
 ; CHECK-NEXT:    # =>This Loop Header: Depth=1
-; CHECK-NEXT:    # Child Loop BB1_9 Depth 2
-; CHECK-NEXT:    testl %r13d, %r13d
-; CHECK-NEXT:    je .LBB1_16
+; CHECK-NEXT:    # Child Loop BB1_11 Depth 2
+; CHECK-NEXT:    testl %r12d, %r12d
+; CHECK-NEXT:    je .LBB1_8
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB1_9: # %shared_loop_header
-; CHECK-NEXT:    # Parent Loop BB1_8 Depth=1
+; CHECK-NEXT:  .LBB1_11: # %shared_loop_header
+; CHECK-NEXT:    # Parent Loop BB1_7 Depth=1
 ; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
-; CHECK-NEXT:    testq %r14, %r14
-; CHECK-NEXT:    jne .LBB1_25
-; CHECK-NEXT:  # %bb.10: # %inner_loop_body
-; CHECK-NEXT:    # in Loop: Header=BB1_9 Depth=2
+; CHECK-NEXT:    testq %rbx, %rbx
+; CHECK-NEXT:    jne .LBB1_20
+; CHECK-NEXT:  # %bb.12: # %inner_loop_body
+; CHECK-NEXT:    # in Loop: Header=BB1_11 Depth=2
 ; CHECK-NEXT:    testb %al, %al
-; CHECK-NEXT:    je .LBB1_9
-; CHECK-NEXT:  # %bb.11: # %if.end96.i
-; CHECK-NEXT:    # in Loop: Header=BB1_8 Depth=1
-; CHECK-NEXT:    cmpl $3, %r13d
-; CHECK-NEXT:    jae .LBB1_20
-; CHECK-NEXT:  # %bb.12: # %if.end287.i
-; CHECK-NEXT:    # in Loop: Header=BB1_8 Depth=1
+; CHECK-NEXT:    je .LBB1_11
+; CHECK-NEXT:  # %bb.13: # %if.end96.i
+; CHECK-NEXT:    # in Loop: Header=BB1_7 Depth=1
+; CHECK-NEXT:    cmpl $3, %r12d
+; CHECK-NEXT:    jae .LBB1_14
+; CHECK-NEXT:  # %bb.15: # %if.end287.i
+; CHECK-NEXT:    # in Loop: Header=BB1_7 Depth=1
 ; CHECK-NEXT:    testb %al, %al
 ; CHECK-NEXT:    # implicit-def: $cl
-; CHECK-NEXT:    jne .LBB1_8
-; CHECK-NEXT:  # %bb.13: # %if.end308.i
-; CHECK-NEXT:    # in Loop: Header=BB1_8 Depth=1
+; CHECK-NEXT:    jne .LBB1_7
+; CHECK-NEXT:  # %bb.16: # %if.end308.i
+; CHECK-NEXT:    # in Loop: Header=BB1_7 Depth=1
 ; CHECK-NEXT:    testb %al, %al
-; CHECK-NEXT:    je .LBB1_7
-; CHECK-NEXT:  # %bb.14: # %if.end335.i
-; CHECK-NEXT:    # in Loop: Header=BB1_8 Depth=1
+; CHECK-NEXT:    je .LBB1_17
+; CHECK-NEXT:  # %bb.18: # %if.end335.i
+; CHECK-NEXT:    # in Loop: Header=BB1_7 Depth=1
 ; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    testb %cl, %cl
-; CHECK-NEXT:    jne .LBB1_8
-; CHECK-NEXT:  # %bb.15: # %merge_other
-; CHECK-NEXT:    # in Loop: Header=BB1_8 Depth=1
+; CHECK-NEXT:    jne .LBB1_7
+; CHECK-NEXT:  # %bb.19: # %merge_other
+; CHECK-NEXT:    # in Loop: Header=BB1_7 Depth=1
 ; CHECK-NEXT:    # implicit-def: $cl
-; CHECK-NEXT:    jmp .LBB1_8
-; CHECK-NEXT:  .LBB1_23:
-; CHECK-NEXT:    movl $1, %ebx
-; CHECK-NEXT:    jmp .LBB1_24
-; CHECK-NEXT:  .LBB1_16: # %while.cond.us1412.i
+; CHECK-NEXT:    jmp .LBB1_7
+; CHECK-NEXT:  .LBB1_8: # %while.cond.us1412.i
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    testb %al, %al
-; CHECK-NEXT:    movl $1, %ebx
-; CHECK-NEXT:    jne .LBB1_18
-; CHECK-NEXT:  # %bb.17: # %while.cond.us1412.i
+; CHECK-NEXT:    jne .LBB1_10
+; CHECK-NEXT:  # %bb.9: # %while.cond.us1412.i
 ; CHECK-NEXT:    decb %cl
-; CHECK-NEXT:    jne .LBB1_24
-; CHECK-NEXT:  .LBB1_18: # %if.end41.us1436.i
-; CHECK-NEXT:  .LBB1_20: # %if.then99.i
-; CHECK-NEXT:    movq .str.6 at GOTPCREL(%rip), %rdi
-; CHECK-NEXT:    xorl %ebx, %ebx
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    callq cli_dbgmsg at PLT
-; CHECK-NEXT:  .LBB1_24: # %cleanup
-; CHECK-NEXT:    movl %ebx, %eax
+; CHECK-NEXT:    je .LBB1_10
+; CHECK-NEXT:  .LBB1_22: # %cleanup
+; CHECK-NEXT:    movl $1, %eax
+; CHECK-NEXT:  .LBB1_23: # %cleanup
 ; CHECK-NEXT:    addq $8, %rsp
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    popq %r12
@@ -199,8 +189,15 @@ define i32 @loop_shared_header(i8* %exe, i32 %exesz, i32 %headsize, i32 %min, i3
 ; CHECK-NEXT:    popq %r15
 ; CHECK-NEXT:    popq %rbp
 ; CHECK-NEXT:    retq
-; CHECK-NEXT:  .LBB1_25: # %wunpsect.exit.thread.loopexit389
-; CHECK-NEXT:  .LBB1_26: # %wunpsect.exit.thread.loopexit391
+; CHECK-NEXT:  .LBB1_14: # %if.then99.i
+; CHECK-NEXT:    movq .str.6 at GOTPCREL(%rip), %rdi
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    callq cli_dbgmsg at PLT
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    jmp .LBB1_23
+; CHECK-NEXT:  .LBB1_20: # %wunpsect.exit.thread.loopexit389
+; CHECK-NEXT:  .LBB1_10: # %if.end41.us1436.i
+; CHECK-NEXT:  .LBB1_21: # %wunpsect.exit.thread.loopexit391
 entry:
   %0 = load i32, i32* undef, align 4
   %mul = shl nsw i32 %0, 2
diff --git a/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll b/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll
index af2fec4b5dffcf..971c3faefdd3cb 100644
--- a/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll
+++ b/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll
@@ -188,20 +188,14 @@ return:
 define noundef ptr @memset_tailc(ptr noundef %0, i64 noundef %1) {
 ; CHECK-LABEL: memset_tailc:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    pushq %rbx
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset %rbx, -16
-; CHECK-NEXT:    movq %rdi, %rbx
 ; CHECK-NEXT:    testq %rdi, %rdi
-; CHECK-NEXT:    je LBB4_2
-; CHECK-NEXT:  ## %bb.1:
+; CHECK-NEXT:    je LBB4_1
+; CHECK-NEXT:  ## %bb.2:
 ; CHECK-NEXT:    movq %rsi, %rdx
-; CHECK-NEXT:    movq %rbx, %rdi
 ; CHECK-NEXT:    xorl %esi, %esi
-; CHECK-NEXT:    callq _memset
-; CHECK-NEXT:  LBB4_2:
-; CHECK-NEXT:    movq %rbx, %rax
-; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    jmp _memset ## TAILCALL
+; CHECK-NEXT:  LBB4_1:
+; CHECK-NEXT:    movq %rdi, %rax
 ; CHECK-NEXT:    retq
   %3 = icmp eq ptr %0, null
   br i1 %3, label %5, label %4
@@ -217,23 +211,15 @@ define noundef ptr @memset_tailc(ptr noundef %0, i64 noundef %1) {
 define noundef ptr @memcpy_tailc(ptr noundef %0, i64 noundef %1, ptr noundef %2) {
 ; CHECK-LABEL: memcpy_tailc:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    pushq %rbx
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset %rbx, -16
 ; CHECK-NEXT:    testq %rsi, %rsi
 ; CHECK-NEXT:    je LBB5_1
 ; CHECK-NEXT:  ## %bb.2:
 ; CHECK-NEXT:    movq %rsi, %rax
-; CHECK-NEXT:    movq %rdi, %rbx
 ; CHECK-NEXT:    movq %rdx, %rsi
 ; CHECK-NEXT:    movq %rax, %rdx
-; CHECK-NEXT:    callq _memcpy
-; CHECK-NEXT:    jmp LBB5_3
+; CHECK-NEXT:    jmp _memcpy ## TAILCALL
 ; CHECK-NEXT:  LBB5_1:
-; CHECK-NEXT:    movq %rdx, %rbx
-; CHECK-NEXT:  LBB5_3:
-; CHECK-NEXT:    movq %rbx, %rax
-; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    movq %rdx, %rax
 ; CHECK-NEXT:    retq
   %4 = icmp eq i64 %1, 0
   br i1 %4, label %6, label %5
diff --git a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll
index fab3847b3a2c51..eb2e00fb90429c 100644
--- a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll
+++ b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll
@@ -477,8 +477,8 @@ define i32 @inlineAsm(i32 %cond, i32 %N) {
 ; ENABLE-NEXT:    popq %rbx
 ; ENABLE-NEXT:    retq
 ; ENABLE-NEXT:  LBB6_4: ## %if.else
+; ENABLE-NEXT:    addl %esi, %esi
 ; ENABLE-NEXT:    movl %esi, %eax
-; ENABLE-NEXT:    addl %esi, %eax
 ; ENABLE-NEXT:    retq
 ;
 ; DISABLE-LABEL: inlineAsm:
@@ -509,8 +509,8 @@ define i32 @inlineAsm(i32 %cond, i32 %N) {
 ; DISABLE-NEXT:    popq %rbx
 ; DISABLE-NEXT:    retq
 ; DISABLE-NEXT:  LBB6_4: ## %if.else
+; DISABLE-NEXT:    addl %esi, %esi
 ; DISABLE-NEXT:    movl %esi, %eax
-; DISABLE-NEXT:    addl %esi, %eax
 ; DISABLE-NEXT:    popq %rbx
 ; DISABLE-NEXT:    retq
 entry:
diff --git a/llvm/test/DebugInfo/X86/live-debug-values-expr-conflict.ll b/llvm/test/DebugInfo/X86/live-debug-values-expr-conflict.ll
index afce27ee4abb9b..2769d39fa54aa2 100644
--- a/llvm/test/DebugInfo/X86/live-debug-values-expr-conflict.ll
+++ b/llvm/test/DebugInfo/X86/live-debug-values-expr-conflict.ll
@@ -24,18 +24,18 @@
 ; one in the block two, and none in block three.
 ; CHECK:       ![[BAZVAR:[0-9]+]] = !DILocalVariable(name: "baz",
 ; CHECK-LABEL: bb.0.entry:
-; CHECK:       DBG_VALUE {{[0-9a-zA-Z$%_]*}}, $noreg, ![[BAZVAR]], 
+; CHECK:       DBG_VALUE {{[0-9a-zA-Z$%_]*}}, $noreg, ![[BAZVAR]], 
 ; CHECK-SAME:     !DIExpression()
 ; CHECK-LABEL: bb.1.if.then:
-; CHECK-LABEL: bb.2.if.else:
-; CHECK:       DBG_VALUE {{[0-9a-zA-Z$%_]*}}, $noreg, ![[BAZVAR]], 
+; CHECK-LABEL: bb.3.if.else:
+; CHECK:       DBG_VALUE {{[0-9a-zA-Z$%_]*}}, $noreg, ![[BAZVAR]], 
 ; CHECK-SAME:     !DIExpression()
-; CHECK:       DBG_VALUE_LIST ![[BAZVAR]], 
-; CHECK-SAME:     !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_plus_uconst, 1, DW_OP_stack_value)
-; CHECK-SAME:     {{[0-9a-zA-Z$%_]*}}
-; CHECK-LABEL: bb.3.if.end:
+; CHECK:       DBG_VALUE_LIST ![[BAZVAR]], 
+; CHECK-SAME:     !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_plus_uconst, 1, DW_OP_stack_value)
+; CHECK-SAME:     {{[0-9a-zA-Z$%_]*}}
+; CHECK-LABEL: bb.2.if.then:
 ; CHECK-NOT:   DBG_VALUE
-; CHECK-NOT:   DBG_VALUE_LIST
+; CHECK-NOT:   DBG_VALUE_LIST
 
 declare void @escape1(i32)
 declare void @escape2(i32)



More information about the llvm-commits mailing list