[llvm] [CGP] Do not fold ret value if constant in `dupRetToEnableTailCallOpts` (PR #76613)

Antonio Frighetto via llvm-commits llvm-commits at lists.llvm.org
Sat Dec 30 05:03:23 PST 2023


https://github.com/antoniofrighetto updated https://github.com/llvm/llvm-project/pull/76613

>From 6775eaa0bf54520086236d263171289bf42785ac Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Sat, 30 Dec 2023 00:21:12 +0100
Subject: [PATCH 1/2] [CGP] Precommit tests for PR76613 (NFC)

---
 llvm/test/CodeGen/X86/tailcall-cgp-dup.ll | 94 +++++++++++++++++++++++
 1 file changed, 94 insertions(+)

diff --git a/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll b/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll
index 48440558283d45..af2fec4b5dffcf 100644
--- a/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll
+++ b/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll
@@ -184,3 +184,97 @@ return:
   %retval = phi ptr [ %ptr, %if.then ], [ %obj, %entry ]
   ret ptr %retval
 }
+
+define noundef ptr @memset_tailc(ptr noundef %0, i64 noundef %1) {
+; CHECK-LABEL: memset_tailc:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset %rbx, -16
+; CHECK-NEXT:    movq %rdi, %rbx
+; CHECK-NEXT:    testq %rdi, %rdi
+; CHECK-NEXT:    je LBB4_2
+; CHECK-NEXT:  ## %bb.1:
+; CHECK-NEXT:    movq %rsi, %rdx
+; CHECK-NEXT:    movq %rbx, %rdi
+; CHECK-NEXT:    xorl %esi, %esi
+; CHECK-NEXT:    callq _memset
+; CHECK-NEXT:  LBB4_2:
+; CHECK-NEXT:    movq %rbx, %rax
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    retq
+  %3 = icmp eq ptr %0, null
+  br i1 %3, label %5, label %4
+
+4:
+  tail call void @llvm.memset.p0.i64(ptr nonnull align 1 %0, i8 0, i64 %1, i1 false)
+  br label %5
+
+5:
+  ret ptr %0
+}
+
+define noundef ptr @memcpy_tailc(ptr noundef %0, i64 noundef %1, ptr noundef %2) {
+; CHECK-LABEL: memcpy_tailc:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset %rbx, -16
+; CHECK-NEXT:    testq %rsi, %rsi
+; CHECK-NEXT:    je LBB5_1
+; CHECK-NEXT:  ## %bb.2:
+; CHECK-NEXT:    movq %rsi, %rax
+; CHECK-NEXT:    movq %rdi, %rbx
+; CHECK-NEXT:    movq %rdx, %rsi
+; CHECK-NEXT:    movq %rax, %rdx
+; CHECK-NEXT:    callq _memcpy
+; CHECK-NEXT:    jmp LBB5_3
+; CHECK-NEXT:  LBB5_1:
+; CHECK-NEXT:    movq %rdx, %rbx
+; CHECK-NEXT:  LBB5_3:
+; CHECK-NEXT:    movq %rbx, %rax
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    retq
+  %4 = icmp eq i64 %1, 0
+  br i1 %4, label %6, label %5
+
+5:
+  tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 %0, ptr align 1 %2, i64 %1, i1 false)
+  br label %6
+
+6:
+  %7 = phi ptr [ %0, %5 ], [ %2, %3 ]
+  ret ptr %7
+}
+
+define noundef ptr @not_to_tailc(ptr noundef %0, i64 noundef %1, ptr noundef %2) {
+; CHECK-LABEL: not_to_tailc:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset %rbx, -16
+; CHECK-NEXT:    movq %rdx, %rbx
+; CHECK-NEXT:    testq %rsi, %rsi
+; CHECK-NEXT:    je LBB6_2
+; CHECK-NEXT:  ## %bb.1:
+; CHECK-NEXT:    movq %rsi, %rdx
+; CHECK-NEXT:    movq %rbx, %rsi
+; CHECK-NEXT:    callq _baz
+; CHECK-NEXT:  LBB6_2:
+; CHECK-NEXT:    movq %rbx, %rax
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    retq
+  %4 = icmp eq i64 %1, 0
+  br i1 %4, label %6, label %5
+
+5:
+  tail call void @baz(ptr noundef %0, ptr noundef %2, i64 noundef %1)
+  br label %6
+
+6:
+  ret ptr %2
+}
+
+declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #1
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
+declare void @baz(ptr noundef, ptr noundef, i64 noundef)

>From 08abfe9d91f154828be7c0734ea9d867071b125b Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Sat, 30 Dec 2023 00:25:23 +0100
Subject: [PATCH 2/2] [CGP] Do not fold ret value if constant in
 `dupRetToEnableTailCallOpts`

Unless the returned value is a constant, which may result in duplicated
code later in codegen, there should be no reason to refrain from hinting
further tail call optimization opportunities. Consider also the cases
in which incoming values in phi-nodes, not coming directly from call
instructions, may still be folded.

Fixes: https://github.com/llvm/llvm-project/issues/75455.
---
 llvm/lib/CodeGen/CodeGenPrepare.cpp           |  12 +-
 .../CodeGen/Thumb2/constant-islands-cbz.ll    |  18 ++-
 .../X86/tail-dup-merge-loop-headers.ll        | 129 +++++++++---------
 llvm/test/CodeGen/X86/tailcall-cgp-dup.ll     |  28 +---
 llvm/test/CodeGen/X86/x86-shrink-wrapping.ll  |   4 +-
 .../X86/live-debug-values-expr-conflict.ll    |  16 +--
 6 files changed, 98 insertions(+), 109 deletions(-)

diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 6e99fb133e26a9..8c28333ac532d9 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -2532,7 +2532,8 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
     }
 
     PN = dyn_cast<PHINode>(V);
-    if (!PN)
+
+    if (isa<Constant>(V))
       return false;
   }
 
@@ -2569,8 +2570,15 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
       Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts();
       CallInst *CI = dyn_cast<CallInst>(IncomingVal);
       BasicBlock *PredBB = PN->getIncomingBlock(I);
+      // Do we have a call whose return value may have been optimized out (e.g.
+      // __builtin_memset) and an unconditional jump as terminator eligible to
+      // be folded?
+      if (!CI && PredBB && PredBB->getSingleSuccessor() == BB)
+        CI = dyn_cast_or_null<CallInst>(
+            PredBB->getTerminator()->getPrevNonDebugInstruction(true));
+
       // Make sure the phi value is indeed produced by the tail call.
-      if (CI && CI->hasOneUse() && CI->getParent() == PredBB &&
+      if (CI && CI->getParent() == PredBB && CI->getNumUses() <= 1 &&
           TLI->mayBeEmittedAsTailCall(CI) &&
           attributesPermitTailCall(F, CI, RetI, *TLI))
         TailCallBBs.push_back(PredBB);
diff --git a/llvm/test/CodeGen/Thumb2/constant-islands-cbz.ll b/llvm/test/CodeGen/Thumb2/constant-islands-cbz.ll
index fc42d4e0e4d8c0..6e5cf4d335d94e 100644
--- a/llvm/test/CodeGen/Thumb2/constant-islands-cbz.ll
+++ b/llvm/test/CodeGen/Thumb2/constant-islands-cbz.ll
@@ -8,12 +8,11 @@ define ptr @test(ptr returned %this, i32 %event_size, ptr %event_pointer) {
 ; CHECK-T1-NEXT:    .save {r4, lr}
 ; CHECK-T1-NEXT:    push {r4, lr}
 ; CHECK-T1-NEXT:    mov r4, r0
-; CHECK-T1-NEXT:    movs r0, #0
-; CHECK-T1-NEXT:    str r0, [r4, #4]
-; CHECK-T1-NEXT:    str r0, [r4, #8]
-; CHECK-T1-NEXT:    str r0, [r4, #12]
-; CHECK-T1-NEXT:    str r0, [r4, #16]
-; CHECK-T1-NEXT:    mov r0, r4
+; CHECK-T1-NEXT:    movs r3, #0
+; CHECK-T1-NEXT:    str r3, [r0, #4]
+; CHECK-T1-NEXT:    str r3, [r0, #8]
+; CHECK-T1-NEXT:    str r3, [r0, #12]
+; CHECK-T1-NEXT:    str r3, [r0, #16]
 ; CHECK-T1-NEXT:    cbz r2, .LBB0_2
 ; CHECK-T1-NEXT:  @ %bb.1: @ %if.else
 ; CHECK-T1-NEXT:    bl equeue_create_inplace
@@ -28,11 +27,10 @@ define ptr @test(ptr returned %this, i32 %event_size, ptr %event_pointer) {
 ; CHECK-T2:       @ %bb.0: @ %entry
 ; CHECK-T2-NEXT:    .save {r4, lr}
 ; CHECK-T2-NEXT:    push {r4, lr}
+; CHECK-T2-NEXT:    movs r3, #0
 ; CHECK-T2-NEXT:    mov r4, r0
-; CHECK-T2-NEXT:    movs r0, #0
-; CHECK-T2-NEXT:    strd r0, r0, [r4, #4]
-; CHECK-T2-NEXT:    strd r0, r0, [r4, #12]
-; CHECK-T2-NEXT:    mov r0, r4
+; CHECK-T2-NEXT:    strd r3, r3, [r0, #4]
+; CHECK-T2-NEXT:    strd r3, r3, [r0, #12]
 ; CHECK-T2-NEXT:    cbz r2, .LBB0_2
 ; CHECK-T2-NEXT:  @ %bb.1: @ %if.else
 ; CHECK-T2-NEXT:    bl equeue_create_inplace
diff --git a/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll b/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll
index 937cc173e7faef..404b8b13328de7 100644
--- a/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll
+++ b/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll
@@ -95,102 +95,92 @@ define i32 @loop_shared_header(i8* %exe, i32 %exesz, i32 %headsize, i32 %min, i3
 ; CHECK-NEXT:    pushq %r12
 ; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    movl $1, %ebx
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    testb %al, %al
-; CHECK-NEXT:    jne .LBB1_24
+; CHECK-NEXT:    jne .LBB1_22
 ; CHECK-NEXT:  # %bb.1: # %if.end19
 ; CHECK-NEXT:    movl %esi, %ebp
-; CHECK-NEXT:    movq %rdi, %r15
-; CHECK-NEXT:    movl (%rax), %r13d
-; CHECK-NEXT:    leal (,%r13,4), %ebx
-; CHECK-NEXT:    movl %ebx, %r12d
+; CHECK-NEXT:    movq %rdi, %r14
+; CHECK-NEXT:    movl (%rax), %r12d
+; CHECK-NEXT:    leal (,%r12,4), %r13d
+; CHECK-NEXT:    movl %r13d, %r15d
 ; CHECK-NEXT:    movl $1, %esi
-; CHECK-NEXT:    movq %r12, %rdi
+; CHECK-NEXT:    movq %r15, %rdi
 ; CHECK-NEXT:    callq cli_calloc at PLT
 ; CHECK-NEXT:    testl %ebp, %ebp
-; CHECK-NEXT:    je .LBB1_23
+; CHECK-NEXT:    je .LBB1_22
 ; CHECK-NEXT:  # %bb.2: # %if.end19
-; CHECK-NEXT:    testl %r13d, %r13d
-; CHECK-NEXT:    je .LBB1_23
+; CHECK-NEXT:    testl %r12d, %r12d
+; CHECK-NEXT:    je .LBB1_22
 ; CHECK-NEXT:  # %bb.3: # %if.end19
-; CHECK-NEXT:    movq %rax, %r14
+; CHECK-NEXT:    movq %rax, %rbx
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    testb %al, %al
-; CHECK-NEXT:    jne .LBB1_23
+; CHECK-NEXT:    jne .LBB1_22
 ; CHECK-NEXT:  # %bb.4: # %if.end19
-; CHECK-NEXT:    cmpq %r15, %r14
-; CHECK-NEXT:    jb .LBB1_23
+; CHECK-NEXT:    cmpq %r14, %rbx
+; CHECK-NEXT:    jb .LBB1_22
 ; CHECK-NEXT:  # %bb.5: # %if.end50
-; CHECK-NEXT:    movq %r14, %rdi
-; CHECK-NEXT:    movq %r12, %rdx
+; CHECK-NEXT:    movq %rbx, %rdi
+; CHECK-NEXT:    movq %r15, %rdx
 ; CHECK-NEXT:    callq memcpy at PLT
-; CHECK-NEXT:    cmpl $4, %ebx
-; CHECK-NEXT:    jb .LBB1_26
+; CHECK-NEXT:    cmpl $4, %r13d
+; CHECK-NEXT:    jb .LBB1_21
 ; CHECK-NEXT:  # %bb.6: # %shared_preheader
 ; CHECK-NEXT:    movb $32, %cl
 ; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    jmp .LBB1_8
+; CHECK-NEXT:    jmp .LBB1_7
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB1_7: # %merge_predecessor_split
-; CHECK-NEXT:    # in Loop: Header=BB1_8 Depth=1
+; CHECK-NEXT:  .LBB1_17: # %merge_predecessor_split
+; CHECK-NEXT:    # in Loop: Header=BB1_7 Depth=1
 ; CHECK-NEXT:    movb $32, %cl
-; CHECK-NEXT:  .LBB1_8: # %outer_loop_header
+; CHECK-NEXT:  .LBB1_7: # %outer_loop_header
 ; CHECK-NEXT:    # =>This Loop Header: Depth=1
-; CHECK-NEXT:    # Child Loop BB1_9 Depth 2
-; CHECK-NEXT:    testl %r13d, %r13d
-; CHECK-NEXT:    je .LBB1_16
+; CHECK-NEXT:    # Child Loop BB1_11 Depth 2
+; CHECK-NEXT:    testl %r12d, %r12d
+; CHECK-NEXT:    je .LBB1_8
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB1_9: # %shared_loop_header
-; CHECK-NEXT:    # Parent Loop BB1_8 Depth=1
+; CHECK-NEXT:  .LBB1_11: # %shared_loop_header
+; CHECK-NEXT:    # Parent Loop BB1_7 Depth=1
 ; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
-; CHECK-NEXT:    testq %r14, %r14
-; CHECK-NEXT:    jne .LBB1_25
-; CHECK-NEXT:  # %bb.10: # %inner_loop_body
-; CHECK-NEXT:    # in Loop: Header=BB1_9 Depth=2
+; CHECK-NEXT:    testq %rbx, %rbx
+; CHECK-NEXT:    jne .LBB1_20
+; CHECK-NEXT:  # %bb.12: # %inner_loop_body
+; CHECK-NEXT:    # in Loop: Header=BB1_11 Depth=2
 ; CHECK-NEXT:    testb %al, %al
-; CHECK-NEXT:    je .LBB1_9
-; CHECK-NEXT:  # %bb.11: # %if.end96.i
-; CHECK-NEXT:    # in Loop: Header=BB1_8 Depth=1
-; CHECK-NEXT:    cmpl $3, %r13d
-; CHECK-NEXT:    jae .LBB1_20
-; CHECK-NEXT:  # %bb.12: # %if.end287.i
-; CHECK-NEXT:    # in Loop: Header=BB1_8 Depth=1
+; CHECK-NEXT:    je .LBB1_11
+; CHECK-NEXT:  # %bb.13: # %if.end96.i
+; CHECK-NEXT:    # in Loop: Header=BB1_7 Depth=1
+; CHECK-NEXT:    cmpl $3, %r12d
+; CHECK-NEXT:    jae .LBB1_14
+; CHECK-NEXT:  # %bb.15: # %if.end287.i
+; CHECK-NEXT:    # in Loop: Header=BB1_7 Depth=1
 ; CHECK-NEXT:    testb %al, %al
 ; CHECK-NEXT:    # implicit-def: $cl
-; CHECK-NEXT:    jne .LBB1_8
-; CHECK-NEXT:  # %bb.13: # %if.end308.i
-; CHECK-NEXT:    # in Loop: Header=BB1_8 Depth=1
+; CHECK-NEXT:    jne .LBB1_7
+; CHECK-NEXT:  # %bb.16: # %if.end308.i
+; CHECK-NEXT:    # in Loop: Header=BB1_7 Depth=1
 ; CHECK-NEXT:    testb %al, %al
-; CHECK-NEXT:    je .LBB1_7
-; CHECK-NEXT:  # %bb.14: # %if.end335.i
-; CHECK-NEXT:    # in Loop: Header=BB1_8 Depth=1
+; CHECK-NEXT:    je .LBB1_17
+; CHECK-NEXT:  # %bb.18: # %if.end335.i
+; CHECK-NEXT:    # in Loop: Header=BB1_7 Depth=1
 ; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    testb %cl, %cl
-; CHECK-NEXT:    jne .LBB1_8
-; CHECK-NEXT:  # %bb.15: # %merge_other
-; CHECK-NEXT:    # in Loop: Header=BB1_8 Depth=1
+; CHECK-NEXT:    jne .LBB1_7
+; CHECK-NEXT:  # %bb.19: # %merge_other
+; CHECK-NEXT:    # in Loop: Header=BB1_7 Depth=1
 ; CHECK-NEXT:    # implicit-def: $cl
-; CHECK-NEXT:    jmp .LBB1_8
-; CHECK-NEXT:  .LBB1_23:
-; CHECK-NEXT:    movl $1, %ebx
-; CHECK-NEXT:    jmp .LBB1_24
-; CHECK-NEXT:  .LBB1_16: # %while.cond.us1412.i
+; CHECK-NEXT:    jmp .LBB1_7
+; CHECK-NEXT:  .LBB1_8: # %while.cond.us1412.i
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    testb %al, %al
-; CHECK-NEXT:    movl $1, %ebx
-; CHECK-NEXT:    jne .LBB1_18
-; CHECK-NEXT:  # %bb.17: # %while.cond.us1412.i
+; CHECK-NEXT:    jne .LBB1_10
+; CHECK-NEXT:  # %bb.9: # %while.cond.us1412.i
 ; CHECK-NEXT:    decb %cl
-; CHECK-NEXT:    jne .LBB1_24
-; CHECK-NEXT:  .LBB1_18: # %if.end41.us1436.i
-; CHECK-NEXT:  .LBB1_20: # %if.then99.i
-; CHECK-NEXT:    movq .str.6 at GOTPCREL(%rip), %rdi
-; CHECK-NEXT:    xorl %ebx, %ebx
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    callq cli_dbgmsg at PLT
-; CHECK-NEXT:  .LBB1_24: # %cleanup
-; CHECK-NEXT:    movl %ebx, %eax
+; CHECK-NEXT:    je .LBB1_10
+; CHECK-NEXT:  .LBB1_22: # %cleanup
+; CHECK-NEXT:    movl $1, %eax
+; CHECK-NEXT:  .LBB1_23: # %cleanup
 ; CHECK-NEXT:    addq $8, %rsp
 ; CHECK-NEXT:    popq %rbx
 ; CHECK-NEXT:    popq %r12
@@ -199,8 +189,15 @@ define i32 @loop_shared_header(i8* %exe, i32 %exesz, i32 %headsize, i32 %min, i3
 ; CHECK-NEXT:    popq %r15
 ; CHECK-NEXT:    popq %rbp
 ; CHECK-NEXT:    retq
-; CHECK-NEXT:  .LBB1_25: # %wunpsect.exit.thread.loopexit389
-; CHECK-NEXT:  .LBB1_26: # %wunpsect.exit.thread.loopexit391
+; CHECK-NEXT:  .LBB1_14: # %if.then99.i
+; CHECK-NEXT:    movq .str.6 at GOTPCREL(%rip), %rdi
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    callq cli_dbgmsg at PLT
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    jmp .LBB1_23
+; CHECK-NEXT:  .LBB1_20: # %wunpsect.exit.thread.loopexit389
+; CHECK-NEXT:  .LBB1_10: # %if.end41.us1436.i
+; CHECK-NEXT:  .LBB1_21: # %wunpsect.exit.thread.loopexit391
 entry:
   %0 = load i32, i32* undef, align 4
   %mul = shl nsw i32 %0, 2
diff --git a/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll b/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll
index af2fec4b5dffcf..971c3faefdd3cb 100644
--- a/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll
+++ b/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll
@@ -188,20 +188,14 @@ return:
 define noundef ptr @memset_tailc(ptr noundef %0, i64 noundef %1) {
 ; CHECK-LABEL: memset_tailc:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    pushq %rbx
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset %rbx, -16
-; CHECK-NEXT:    movq %rdi, %rbx
 ; CHECK-NEXT:    testq %rdi, %rdi
-; CHECK-NEXT:    je LBB4_2
-; CHECK-NEXT:  ## %bb.1:
+; CHECK-NEXT:    je LBB4_1
+; CHECK-NEXT:  ## %bb.2:
 ; CHECK-NEXT:    movq %rsi, %rdx
-; CHECK-NEXT:    movq %rbx, %rdi
 ; CHECK-NEXT:    xorl %esi, %esi
-; CHECK-NEXT:    callq _memset
-; CHECK-NEXT:  LBB4_2:
-; CHECK-NEXT:    movq %rbx, %rax
-; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    jmp _memset ## TAILCALL
+; CHECK-NEXT:  LBB4_1:
+; CHECK-NEXT:    movq %rdi, %rax
 ; CHECK-NEXT:    retq
   %3 = icmp eq ptr %0, null
   br i1 %3, label %5, label %4
@@ -217,23 +211,15 @@ define noundef ptr @memset_tailc(ptr noundef %0, i64 noundef %1) {
 define noundef ptr @memcpy_tailc(ptr noundef %0, i64 noundef %1, ptr noundef %2) {
 ; CHECK-LABEL: memcpy_tailc:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    pushq %rbx
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset %rbx, -16
 ; CHECK-NEXT:    testq %rsi, %rsi
 ; CHECK-NEXT:    je LBB5_1
 ; CHECK-NEXT:  ## %bb.2:
 ; CHECK-NEXT:    movq %rsi, %rax
-; CHECK-NEXT:    movq %rdi, %rbx
 ; CHECK-NEXT:    movq %rdx, %rsi
 ; CHECK-NEXT:    movq %rax, %rdx
-; CHECK-NEXT:    callq _memcpy
-; CHECK-NEXT:    jmp LBB5_3
+; CHECK-NEXT:    jmp _memcpy ## TAILCALL
 ; CHECK-NEXT:  LBB5_1:
-; CHECK-NEXT:    movq %rdx, %rbx
-; CHECK-NEXT:  LBB5_3:
-; CHECK-NEXT:    movq %rbx, %rax
-; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    movq %rdx, %rax
 ; CHECK-NEXT:    retq
   %4 = icmp eq i64 %1, 0
   br i1 %4, label %6, label %5
diff --git a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll
index fab3847b3a2c51..eb2e00fb90429c 100644
--- a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll
+++ b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll
@@ -477,8 +477,8 @@ define i32 @inlineAsm(i32 %cond, i32 %N) {
 ; ENABLE-NEXT:    popq %rbx
 ; ENABLE-NEXT:    retq
 ; ENABLE-NEXT:  LBB6_4: ## %if.else
+; ENABLE-NEXT:    addl %esi, %esi
 ; ENABLE-NEXT:    movl %esi, %eax
-; ENABLE-NEXT:    addl %esi, %eax
 ; ENABLE-NEXT:    retq
 ;
 ; DISABLE-LABEL: inlineAsm:
@@ -509,8 +509,8 @@ define i32 @inlineAsm(i32 %cond, i32 %N) {
 ; DISABLE-NEXT:    popq %rbx
 ; DISABLE-NEXT:    retq
 ; DISABLE-NEXT:  LBB6_4: ## %if.else
+; DISABLE-NEXT:    addl %esi, %esi
 ; DISABLE-NEXT:    movl %esi, %eax
-; DISABLE-NEXT:    addl %esi, %eax
 ; DISABLE-NEXT:    popq %rbx
 ; DISABLE-NEXT:    retq
 entry:
diff --git a/llvm/test/DebugInfo/X86/live-debug-values-expr-conflict.ll b/llvm/test/DebugInfo/X86/live-debug-values-expr-conflict.ll
index afce27ee4abb9b..2769d39fa54aa2 100644
--- a/llvm/test/DebugInfo/X86/live-debug-values-expr-conflict.ll
+++ b/llvm/test/DebugInfo/X86/live-debug-values-expr-conflict.ll
@@ -24,18 +24,18 @@
 ; one in the block two, and none in block three.
 ; CHECK:       ![[BAZVAR:[0-9]+]] = !DILocalVariable(name: "baz",
 ; CHECK-LABEL: bb.0.entry:
-; CHECK:       DBG_VALUE {{[0-9a-zA-Z$%_]*}}, $noreg, ![[BAZVAR]], 
+; CHECK:       DBG_VALUE {{[0-9a-zA-Z$%_]*}}, $noreg, ![[BAZVAR]], 
 ; CHECK-SAME:     !DIExpression()
 ; CHECK-LABEL: bb.1.if.then:
-; CHECK-LABEL: bb.2.if.else:
-; CHECK:       DBG_VALUE {{[0-9a-zA-Z$%_]*}}, $noreg, ![[BAZVAR]], 
+; CHECK-LABEL: bb.3.if.else:
+; CHECK:       DBG_VALUE {{[0-9a-zA-Z$%_]*}}, $noreg, ![[BAZVAR]], 
 ; CHECK-SAME:     !DIExpression()
-; CHECK:       DBG_VALUE_LIST ![[BAZVAR]], 
-; CHECK-SAME:     !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_plus_uconst, 1, DW_OP_stack_value)
-; CHECK-SAME:     {{[0-9a-zA-Z$%_]*}}
-; CHECK-LABEL: bb.3.if.end:
+; CHECK:       DBG_VALUE_LIST ![[BAZVAR]], 
+; CHECK-SAME:     !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_plus_uconst, 1, DW_OP_stack_value)
+; CHECK-SAME:     {{[0-9a-zA-Z$%_]*}}
+; CHECK-LABEL: bb.2.if.then:
 ; CHECK-NOT:   DBG_VALUE
-; CHECK-NOT:   DBG_VALUE_LIST
+; CHECK-NOT:   DBG_VALUE_LIST
 
 declare void @escape1(i32)
 declare void @escape2(i32)



More information about the llvm-commits mailing list