[llvm] [TailDuplicator] Add a limit on the size of predecessors (PR #78582)

via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 18 06:00:43 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-x86

Author: Quentin Dian (DianQK)

<details>
<summary>Changes</summary>

Fixes #<!-- -->78578.

We should add a count check to the predecessors to avoid the code size explosion.

I found a strange argument during my investigation.

https://github.com/llvm/llvm-project/blob/4b2381a5f05dcd576e50615ec1d9661fbae3282c/llvm/lib/CodeGen/TailDuplicator.cpp#L76-L77

We didn't use `-tail-dup-limit` while setting a meaningless value.

Also, it may be that an issue with AsmPrinter is causing this use case to print two line breaks. This makes the test case fail. I haven't checked, but I don't think it at least affects the review.

---

Patch is 29.43 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/78582.diff


3 Files Affected:

- (modified) llvm/lib/CodeGen/TailDuplicator.cpp (+7) 
- (modified) llvm/test/CodeGen/X86/mul-constant-result.ll (+135-188) 
- (added) llvm/test/CodeGen/X86/tail-dup-pred-size-limit.ll (+242) 


``````````diff
diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp
index 5ed67bd0a121ed..e76d63d3c0d66f 100644
--- a/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -76,6 +76,11 @@ static cl::opt<bool>
 static cl::opt<unsigned> TailDupLimit("tail-dup-limit", cl::init(~0U),
                                       cl::Hidden);
 
+static cl::opt<unsigned> TailDupPredSizeLimit(
+    "tail-dup-pred-size-limit",
+    cl::desc("Maximum predecessors to consider tail duplicating."), cl::init(8),
+    cl::Hidden);
+
 void TailDuplicator::initMF(MachineFunction &MFin, bool PreRegAlloc,
                             const MachineBranchProbabilityInfo *MBPIin,
                             MBFIWrapper *MBFIin,
@@ -565,6 +570,8 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
   if (TailBB.isSuccessor(&TailBB))
     return false;
 
+  if (TailDupPredSizeLimit < TailBB.pred_size())
+    return false;
   // Set the limit on the cost to duplicate. When optimizing for size,
   // duplicate only one, because one branch instruction can be eliminated to
   // compensate for the duplication.
diff --git a/llvm/test/CodeGen/X86/mul-constant-result.ll b/llvm/test/CodeGen/X86/mul-constant-result.ll
index 1f9e7a93ad0b90..73c764a3f53da1 100644
--- a/llvm/test/CodeGen/X86/mul-constant-result.ll
+++ b/llvm/test/CodeGen/X86/mul-constant-result.ll
@@ -28,162 +28,132 @@ define i32 @mult(i32, i32) local_unnamed_addr #0 {
 ; X86-NEXT:  .LBB0_4:
 ; X86-NEXT:    decl %ecx
 ; X86-NEXT:    cmpl $31, %ecx
-; X86-NEXT:    ja .LBB0_35
+; X86-NEXT:    ja .LBB0_31
 ; X86-NEXT:  # %bb.5:
 ; X86-NEXT:    jmpl *.LJTI0_0(,%ecx,4)
 ; X86-NEXT:  .LBB0_6:
 ; X86-NEXT:    addl %eax, %eax
-; X86-NEXT:    popl %esi
-; X86-NEXT:    .cfi_def_cfa_offset 4
-; X86-NEXT:    retl
+; X86-NEXT:    jmp .LBB0_40
 ; X86-NEXT:  .LBB0_7:
-; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    leal (%eax,%eax,8), %ecx
 ; X86-NEXT:    leal (%ecx,%ecx,2), %ecx
-; X86-NEXT:    jmp .LBB0_9
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    jmp .LBB0_40
 ; X86-NEXT:  .LBB0_8:
 ; X86-NEXT:    movl %eax, %ecx
 ; X86-NEXT:    shll $4, %ecx
-; X86-NEXT:    jmp .LBB0_9
-; X86-NEXT:  .LBB0_10:
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    jmp .LBB0_40
+; X86-NEXT:  .LBB0_9:
 ; X86-NEXT:    leal (%eax,%eax,4), %eax
-; X86-NEXT:    jmp .LBB0_18
-; X86-NEXT:  .LBB0_11:
+; X86-NEXT:    jmp .LBB0_39
+; X86-NEXT:  .LBB0_10:
 ; X86-NEXT:    shll $2, %eax
-; X86-NEXT:    jmp .LBB0_18
-; X86-NEXT:  .LBB0_13:
+; X86-NEXT:    jmp .LBB0_39
+; X86-NEXT:  .LBB0_11:
+; X86-NEXT:    leal (%eax,%eax,4), %eax
+; X86-NEXT:    jmp .LBB0_40
+; X86-NEXT:  .LBB0_12:
 ; X86-NEXT:    leal (%eax,%eax,2), %ecx
-; X86-NEXT:    jmp .LBB0_14
-; X86-NEXT:  .LBB0_15:
+; X86-NEXT:    leal (%eax,%ecx,4), %eax
+; X86-NEXT:    jmp .LBB0_40
+; X86-NEXT:  .LBB0_13:
 ; X86-NEXT:    addl %eax, %eax
-; X86-NEXT:    jmp .LBB0_12
-; X86-NEXT:  .LBB0_16:
+; X86-NEXT:    leal (%eax,%eax,4), %eax
+; X86-NEXT:    jmp .LBB0_40
+; X86-NEXT:  .LBB0_14:
 ; X86-NEXT:    leal (%eax,%eax,4), %ecx
 ; X86-NEXT:    leal (%ecx,%ecx,4), %ecx
-; X86-NEXT:    jmp .LBB0_9
-; X86-NEXT:  .LBB0_17:
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    jmp .LBB0_40
+; X86-NEXT:  .LBB0_15:
 ; X86-NEXT:    leal (%eax,%eax,4), %eax
-; X86-NEXT:    jmp .LBB0_12
-; X86-NEXT:  .LBB0_19:
+; X86-NEXT:    leal (%eax,%eax,4), %eax
+; X86-NEXT:    jmp .LBB0_40
+; X86-NEXT:  .LBB0_17:
 ; X86-NEXT:    shll $4, %eax
-; X86-NEXT:    popl %esi
-; X86-NEXT:    .cfi_def_cfa_offset 4
-; X86-NEXT:    retl
-; X86-NEXT:  .LBB0_20:
-; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    jmp .LBB0_40
+; X86-NEXT:  .LBB0_18:
 ; X86-NEXT:    shll $2, %eax
-; X86-NEXT:    popl %esi
-; X86-NEXT:    .cfi_def_cfa_offset 4
-; X86-NEXT:    retl
-; X86-NEXT:  .LBB0_21:
-; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    jmp .LBB0_40
+; X86-NEXT:  .LBB0_19:
 ; X86-NEXT:    shll $3, %eax
-; X86-NEXT:    popl %esi
-; X86-NEXT:    .cfi_def_cfa_offset 4
-; X86-NEXT:    retl
-; X86-NEXT:  .LBB0_22:
-; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    jmp .LBB0_40
+; X86-NEXT:  .LBB0_20:
 ; X86-NEXT:    shll $5, %eax
-; X86-NEXT:    popl %esi
-; X86-NEXT:    .cfi_def_cfa_offset 4
-; X86-NEXT:    retl
-; X86-NEXT:  .LBB0_23:
-; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    jmp .LBB0_40
+; X86-NEXT:  .LBB0_21:
 ; X86-NEXT:    addl %eax, %eax
-; X86-NEXT:  .LBB0_33:
 ; X86-NEXT:    leal (%eax,%eax,8), %eax
-; X86-NEXT:    popl %esi
-; X86-NEXT:    .cfi_def_cfa_offset 4
-; X86-NEXT:    retl
-; X86-NEXT:  .LBB0_24:
-; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    jmp .LBB0_40
+; X86-NEXT:  .LBB0_22:
 ; X86-NEXT:    leal (%eax,%eax,4), %ecx
-; X86-NEXT:  .LBB0_14:
 ; X86-NEXT:    leal (%eax,%ecx,4), %eax
-; X86-NEXT:    popl %esi
-; X86-NEXT:    .cfi_def_cfa_offset 4
-; X86-NEXT:    retl
-; X86-NEXT:  .LBB0_25:
-; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    jmp .LBB0_40
+; X86-NEXT:  .LBB0_23:
 ; X86-NEXT:    addl %eax, %eax
-; X86-NEXT:    jmp .LBB0_18
-; X86-NEXT:  .LBB0_26:
+; X86-NEXT:    jmp .LBB0_39
+; X86-NEXT:  .LBB0_24:
 ; X86-NEXT:    leal (%eax,%eax,4), %ecx
 ; X86-NEXT:    leal (%eax,%ecx,4), %ecx
-; X86-NEXT:    jmp .LBB0_9
-; X86-NEXT:  .LBB0_27:
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    jmp .LBB0_40
+; X86-NEXT:  .LBB0_25:
 ; X86-NEXT:    leal (%eax,%eax), %ecx
 ; X86-NEXT:    shll $4, %eax
-; X86-NEXT:    jmp .LBB0_28
-; X86-NEXT:  .LBB0_29:
+; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    jmp .LBB0_40
+; X86-NEXT:  .LBB0_26:
 ; X86-NEXT:    leal (,%eax,8), %ecx
-; X86-NEXT:    jmp .LBB0_38
-; X86-NEXT:  .LBB0_30:
+; X86-NEXT:    jmp .LBB0_33
+; X86-NEXT:  .LBB0_27:
 ; X86-NEXT:    leal (%eax,%eax,8), %ecx
-; X86-NEXT:    jmp .LBB0_32
-; X86-NEXT:  .LBB0_31:
+; X86-NEXT:    leal (%eax,%ecx,2), %eax
+; X86-NEXT:    jmp .LBB0_40
+; X86-NEXT:  .LBB0_28:
 ; X86-NEXT:    leal (%eax,%eax,4), %ecx
-; X86-NEXT:  .LBB0_32:
 ; X86-NEXT:    leal (%eax,%ecx,2), %eax
-; X86-NEXT:    popl %esi
-; X86-NEXT:    .cfi_def_cfa_offset 4
-; X86-NEXT:    retl
-; X86-NEXT:  .LBB0_34:
-; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    jmp .LBB0_40
+; X86-NEXT:  .LBB0_29:
+; X86-NEXT:    leal (%eax,%eax,8), %eax
+; X86-NEXT:    jmp .LBB0_40
+; X86-NEXT:  .LBB0_30:
 ; X86-NEXT:    movl %eax, %ecx
 ; X86-NEXT:    shll $5, %ecx
-; X86-NEXT:    jmp .LBB0_38
-; X86-NEXT:  .LBB0_35:
+; X86-NEXT:    jmp .LBB0_33
+; X86-NEXT:  .LBB0_31:
 ; X86-NEXT:    xorl %eax, %eax
-; X86-NEXT:  .LBB0_36:
-; X86-NEXT:    popl %esi
-; X86-NEXT:    .cfi_def_cfa_offset 4
-; X86-NEXT:    retl
-; X86-NEXT:  .LBB0_37:
-; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    jmp .LBB0_40
+; X86-NEXT:  .LBB0_32:
 ; X86-NEXT:    leal (%eax,%eax,2), %ecx
 ; X86-NEXT:    shll $3, %ecx
-; X86-NEXT:  .LBB0_38:
+; X86-NEXT:  .LBB0_33:
 ; X86-NEXT:    subl %eax, %ecx
 ; X86-NEXT:    movl %ecx, %eax
-; X86-NEXT:    popl %esi
-; X86-NEXT:    .cfi_def_cfa_offset 4
-; X86-NEXT:    retl
-; X86-NEXT:  .LBB0_39:
-; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    jmp .LBB0_40
+; X86-NEXT:  .LBB0_34:
 ; X86-NEXT:    shll $2, %eax
-; X86-NEXT:  .LBB0_12:
 ; X86-NEXT:    leal (%eax,%eax,4), %eax
-; X86-NEXT:    popl %esi
-; X86-NEXT:    .cfi_def_cfa_offset 4
-; X86-NEXT:    retl
-; X86-NEXT:  .LBB0_40:
-; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    jmp .LBB0_40
+; X86-NEXT:  .LBB0_35:
 ; X86-NEXT:    shll $3, %eax
-; X86-NEXT:    jmp .LBB0_18
-; X86-NEXT:  .LBB0_41:
+; X86-NEXT:    jmp .LBB0_39
+; X86-NEXT:  .LBB0_36:
 ; X86-NEXT:    leal (%eax,%eax,8), %ecx
 ; X86-NEXT:    leal (%ecx,%ecx,2), %ecx
 ; X86-NEXT:    addl %eax, %eax
-; X86-NEXT:  .LBB0_9:
 ; X86-NEXT:    addl %ecx, %eax
-; X86-NEXT:    popl %esi
-; X86-NEXT:    .cfi_def_cfa_offset 4
-; X86-NEXT:    retl
-; X86-NEXT:  .LBB0_42:
-; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    jmp .LBB0_40
+; X86-NEXT:  .LBB0_37:
 ; X86-NEXT:    leal (%eax,%eax), %ecx
 ; X86-NEXT:    shll $5, %eax
-; X86-NEXT:  .LBB0_28:
 ; X86-NEXT:    subl %ecx, %eax
-; X86-NEXT:    popl %esi
-; X86-NEXT:    .cfi_def_cfa_offset 4
-; X86-NEXT:    retl
-; X86-NEXT:  .LBB0_43:
-; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    jmp .LBB0_40
+; X86-NEXT:  .LBB0_38:
 ; X86-NEXT:    leal (%eax,%eax,8), %eax
-; X86-NEXT:  .LBB0_18:
+; X86-NEXT:  .LBB0_39:
 ; X86-NEXT:    leal (%eax,%eax,2), %eax
+; X86-NEXT:  .LBB0_40:
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
@@ -199,154 +169,131 @@ define i32 @mult(i32, i32) local_unnamed_addr #0 {
 ; X64-HSW-NEXT:    cmovel %ecx, %eax
 ; X64-HSW-NEXT:    decl %edi
 ; X64-HSW-NEXT:    cmpl $31, %edi
-; X64-HSW-NEXT:    ja .LBB0_31
+; X64-HSW-NEXT:    ja .LBB0_28
 ; X64-HSW-NEXT:  # %bb.1:
 ; X64-HSW-NEXT:    jmpq *.LJTI0_0(,%rdi,8)
 ; X64-HSW-NEXT:  .LBB0_2:
 ; X64-HSW-NEXT:    addl %eax, %eax
-; X64-HSW-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-HSW-NEXT:    retq
+; X64-HSW-NEXT:    jmp .LBB0_37
 ; X64-HSW-NEXT:  .LBB0_3:
 ; X64-HSW-NEXT:    leal (%rax,%rax,8), %ecx
 ; X64-HSW-NEXT:    leal (%rcx,%rcx,2), %ecx
-; X64-HSW-NEXT:    jmp .LBB0_22
+; X64-HSW-NEXT:    jmp .LBB0_21
 ; X64-HSW-NEXT:  .LBB0_4:
 ; X64-HSW-NEXT:    movl %eax, %ecx
 ; X64-HSW-NEXT:    shll $4, %ecx
-; X64-HSW-NEXT:    jmp .LBB0_22
+; X64-HSW-NEXT:    jmp .LBB0_21
 ; X64-HSW-NEXT:  .LBB0_5:
 ; X64-HSW-NEXT:    leal (%rax,%rax,4), %eax
-; X64-HSW-NEXT:  .LBB0_13:
-; X64-HSW-NEXT:    leal (%rax,%rax,2), %eax
-; X64-HSW-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-HSW-NEXT:    retq
+; X64-HSW-NEXT:    jmp .LBB0_36
 ; X64-HSW-NEXT:  .LBB0_6:
 ; X64-HSW-NEXT:    shll $2, %eax
-; X64-HSW-NEXT:    leal (%rax,%rax,2), %eax
-; X64-HSW-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-HSW-NEXT:    retq
+; X64-HSW-NEXT:    jmp .LBB0_36
+; X64-HSW-NEXT:  .LBB0_7:
+; X64-HSW-NEXT:    leal (%rax,%rax,4), %eax
+; X64-HSW-NEXT:    jmp .LBB0_37
 ; X64-HSW-NEXT:  .LBB0_8:
 ; X64-HSW-NEXT:    leal (%rax,%rax,2), %ecx
 ; X64-HSW-NEXT:    leal (%rax,%rcx,4), %eax
-; X64-HSW-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-HSW-NEXT:    retq
-; X64-HSW-NEXT:  .LBB0_10:
+; X64-HSW-NEXT:    jmp .LBB0_37
+; X64-HSW-NEXT:  .LBB0_9:
 ; X64-HSW-NEXT:    addl %eax, %eax
-; X64-HSW-NEXT:  .LBB0_7:
 ; X64-HSW-NEXT:    leal (%rax,%rax,4), %eax
-; X64-HSW-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-HSW-NEXT:    retq
-; X64-HSW-NEXT:  .LBB0_11:
+; X64-HSW-NEXT:    jmp .LBB0_37
+; X64-HSW-NEXT:  .LBB0_10:
 ; X64-HSW-NEXT:    leal (%rax,%rax,4), %ecx
 ; X64-HSW-NEXT:    leal (%rcx,%rcx,4), %ecx
-; X64-HSW-NEXT:    jmp .LBB0_22
-; X64-HSW-NEXT:  .LBB0_12:
+; X64-HSW-NEXT:    jmp .LBB0_21
+; X64-HSW-NEXT:  .LBB0_11:
 ; X64-HSW-NEXT:    leal (%rax,%rax,4), %eax
 ; X64-HSW-NEXT:    leal (%rax,%rax,4), %eax
-; X64-HSW-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-HSW-NEXT:    retq
-; X64-HSW-NEXT:  .LBB0_14:
+; X64-HSW-NEXT:    jmp .LBB0_37
+; X64-HSW-NEXT:  .LBB0_13:
 ; X64-HSW-NEXT:    shll $4, %eax
-; X64-HSW-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-HSW-NEXT:    retq
-; X64-HSW-NEXT:  .LBB0_15:
+; X64-HSW-NEXT:    jmp .LBB0_37
+; X64-HSW-NEXT:  .LBB0_14:
 ; X64-HSW-NEXT:    shll $2, %eax
-; X64-HSW-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-HSW-NEXT:    retq
-; X64-HSW-NEXT:  .LBB0_16:
+; X64-HSW-NEXT:    jmp .LBB0_37
+; X64-HSW-NEXT:  .LBB0_15:
 ; X64-HSW-NEXT:    shll $3, %eax
-; X64-HSW-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-HSW-NEXT:    retq
-; X64-HSW-NEXT:  .LBB0_17:
+; X64-HSW-NEXT:    jmp .LBB0_37
+; X64-HSW-NEXT:  .LBB0_16:
 ; X64-HSW-NEXT:    shll $5, %eax
-; X64-HSW-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-HSW-NEXT:    retq
-; X64-HSW-NEXT:  .LBB0_18:
+; X64-HSW-NEXT:    jmp .LBB0_37
+; X64-HSW-NEXT:  .LBB0_17:
 ; X64-HSW-NEXT:    addl %eax, %eax
-; X64-HSW-NEXT:  .LBB0_29:
 ; X64-HSW-NEXT:    leal (%rax,%rax,8), %eax
-; X64-HSW-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-HSW-NEXT:    retq
-; X64-HSW-NEXT:  .LBB0_19:
+; X64-HSW-NEXT:    jmp .LBB0_37
+; X64-HSW-NEXT:  .LBB0_18:
 ; X64-HSW-NEXT:    leal (%rax,%rax,4), %ecx
 ; X64-HSW-NEXT:    leal (%rax,%rcx,4), %eax
-; X64-HSW-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-HSW-NEXT:    retq
-; X64-HSW-NEXT:  .LBB0_20:
+; X64-HSW-NEXT:    jmp .LBB0_37
+; X64-HSW-NEXT:  .LBB0_19:
 ; X64-HSW-NEXT:    addl %eax, %eax
-; X64-HSW-NEXT:    leal (%rax,%rax,2), %eax
-; X64-HSW-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-HSW-NEXT:    retq
-; X64-HSW-NEXT:  .LBB0_21:
+; X64-HSW-NEXT:    jmp .LBB0_36
+; X64-HSW-NEXT:  .LBB0_20:
 ; X64-HSW-NEXT:    leal (%rax,%rax,4), %ecx
 ; X64-HSW-NEXT:    leal (%rax,%rcx,4), %ecx
-; X64-HSW-NEXT:  .LBB0_22:
+; X64-HSW-NEXT:  .LBB0_21:
 ; X64-HSW-NEXT:    addl %eax, %ecx
 ; X64-HSW-NEXT:    movl %ecx, %eax
-; X64-HSW-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-HSW-NEXT:    retq
-; X64-HSW-NEXT:  .LBB0_23:
+; X64-HSW-NEXT:    jmp .LBB0_37
+; X64-HSW-NEXT:  .LBB0_22:
 ; X64-HSW-NEXT:    leal (%rax,%rax), %ecx
 ; X64-HSW-NEXT:    shll $4, %eax
 ; X64-HSW-NEXT:    subl %ecx, %eax
-; X64-HSW-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-HSW-NEXT:    retq
-; X64-HSW-NEXT:  .LBB0_25:
+; X64-HSW-NEXT:    jmp .LBB0_37
+; X64-HSW-NEXT:  .LBB0_23:
 ; X64-HSW-NEXT:    leal (,%rax,8), %ecx
-; X64-HSW-NEXT:    jmp .LBB0_34
-; X64-HSW-NEXT:  .LBB0_26:
+; X64-HSW-NEXT:    jmp .LBB0_30
+; X64-HSW-NEXT:  .LBB0_24:
 ; X64-HSW-NEXT:    leal (%rax,%rax,8), %ecx
 ; X64-HSW-NEXT:    leal (%rax,%rcx,2), %eax
-; X64-HSW-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-HSW-NEXT:    retq
-; X64-HSW-NEXT:  .LBB0_27:
+; X64-HSW-NEXT:    jmp .LBB0_37
+; X64-HSW-NEXT:  .LBB0_25:
 ; X64-HSW-NEXT:    leal (%rax,%rax,4), %ecx
 ; X64-HSW-NEXT:    leal (%rax,%rcx,2), %eax
-; X64-HSW-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-HSW-NEXT:    retq
-; X64-HSW-NEXT:  .LBB0_30:
+; X64-HSW-NEXT:    jmp .LBB0_37
+; X64-HSW-NEXT:  .LBB0_26:
+; X64-HSW-NEXT:    leal (%rax,%rax,8), %eax
+; X64-HSW-NEXT:    jmp .LBB0_37
+; X64-HSW-NEXT:  .LBB0_27:
 ; X64-HSW-NEXT:    movl %eax, %ecx
 ; X64-HSW-NEXT:    shll $5, %ecx
-; X64-HSW-NEXT:    jmp .LBB0_34
-; X64-HSW-NEXT:  .LBB0_31:
+; X64-HSW-NEXT:    jmp .LBB0_30
+; X64-HSW-NEXT:  .LBB0_28:
 ; X64-HSW-NEXT:    xorl %eax, %eax
-; X64-HSW-NEXT:  .LBB0_32:
-; X64-HSW-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-HSW-NEXT:    retq
-; X64-HSW-NEXT:  .LBB0_33:
+; X64-HSW-NEXT:    jmp .LBB0_37
+; X64-HSW-NEXT:  .LBB0_29:
 ; X64-HSW-NEXT:    leal (%rax,%rax,2), %ecx
 ; X64-HSW-NEXT:    shll $3, %ecx
-; X64-HSW-NEXT:  .LBB0_34:
+; X64-HSW-NEXT:  .LBB0_30:
 ; X64-HSW-NEXT:    subl %eax, %ecx
 ; X64-HSW-NEXT:    movl %ecx, %eax
-; X64-HSW-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-HSW-NEXT:    retq
-; X64-HSW-NEXT:  .LBB0_36:
+; X64-HSW-NEXT:    jmp .LBB0_37
+; X64-HSW-NEXT:  .LBB0_31:
 ; X64-HSW-NEXT:    shll $2, %eax
 ; X64-HSW-NEXT:    leal (%rax,%rax,4), %eax
-; X64-HSW-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-HSW-NEXT:    retq
-; X64-HSW-NEXT:  .LBB0_37:
+; X64-HSW-NEXT:    jmp .LBB0_37
+; X64-HSW-NEXT:  .LBB0_32:
 ; X64-HSW-NEXT:    shll $3, %eax
-; X64-HSW-NEXT:    leal (%rax,%rax,2), %eax
-; X64-HSW-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-HSW-NEXT:    retq
-; X64-HSW-NEXT:  .LBB0_38:
+; X64-HSW-NEXT:    jmp .LBB0_36
+; X64-HSW-NEXT:  .LBB0_33:
 ; X64-HSW-NEXT:    leal (%rax,%rax,8), %ecx
 ; X64-HSW-NEXT:    leal (%rcx,%rcx,2), %ecx
 ; X64-HSW-NEXT:    addl %eax, %eax
 ; X64-HSW-NEXT:    addl %ecx, %eax
-; X64-HSW-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-HSW-NEXT:    retq
-; X64-HSW-NEXT:  .LBB0_39:
+; X64-HSW-NEXT:    jmp .LBB0_37
+; X64-HSW-NEXT:  .LBB0_34:
 ; X64-HSW-NEXT:    leal (%rax,%rax), %ecx
 ; X64-HSW-NEXT:    shll $5, %eax
 ; X64-HSW-NEXT:    subl %ecx, %eax
-; X64-HSW-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-HSW-NEXT:    retq
-; X64-HSW-NEXT:  .LBB0_40:
+; X64-HSW-NEXT:    jmp .LBB0_37
+; X64-HSW-NEXT:  .LBB0_35:
 ; X64-HSW-NEXT:    leal (%rax,%rax,8), %eax
+; X64-HSW-NEXT:  .LBB0_36:
 ; X64-HSW-NEXT:    leal (%rax,%rax,2), %eax
+; X64-HSW-NEXT:  .LBB0_37:
 ; X64-HSW-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-HSW-NEXT:    retq
   %3 = icmp eq i32 %1, 0
diff --git a/llvm/test/CodeGen/X86/tail-dup-pred-size-limit.ll b/llvm/test/CodeGen/X86/tail-dup-pred-size-limit.ll
new file mode 100644
index 00000000000000..47b9fcaa7d6c85
--- /dev/null
+++ b/llvm/test/CodeGen/X86/tail-dup-pred-size-limit.ll
@@ -0,0 +1,242 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -stop-after=early-tailduplication -tail-dup-pred-size-limit=3 < %s | FileCheck %s -check-prefix=LIMIT
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -stop-after=early-tailduplication -tail-dup-pred-size-limit=4 < %s | FileCheck %s -check-prefix=NOLIMIT
+
+define i32 @foo(ptr %0, i32 %1) {
+  ; LIMIT-LABEL: name: foo
+  ; LIMIT: bb.0 (%ir-block.2):
+  ; LIMIT-NEXT:   successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000)
+  ; LIMIT-NEXT:   liveins: $rdi, $esi
+  ; LIMIT-NEXT: {{  $}}
+  ; LIMIT-NEXT:   [[COPY:%[0-9]+]]:gr32 = COPY $esi
+  ; LIMIT-NEXT:   [[COPY1:%[0-9]+]]:gr64 = COPY $rdi
+  ; LIMIT-NEXT:   [[SHR32ri:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 1, implicit-def dead $eflags
+  ; LIMIT-NEXT:   [[AND32ri:%[0-9]+]]:gr32 = AND32ri [[SHR32ri]], 7, implicit-def dead $eflags
+  ; LIMIT-NEXT:   [[SUBREG_TO_REG:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, killed [[AND32ri]], %subreg.sub_32bit
+  ; LIMIT-NEXT:   JMP64m $noreg, 8, [[SUBREG_TO_REG]], %jump-table.0, $noreg :: (load (s64) from jump-table)
+  ; LIMIT-NEXT: {{  $}}
+  ; LIMIT-NEXT: bb.1 (%ir-block.5):
+  ; LIMIT-NEXT:   successors: %bb.6(0x80000000)
+  ; LIMIT-NEXT: {{  $}}
+  ; LIMIT-NEXT:   [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  ; LIMIT-NEXT:   JMP_1 %bb.6
+  ; LIMIT-NEXT: {{  $}}
+  ; LIMIT-NEXT: bb.2 (%ir-block.7):
+  ; LIMIT-NEXT:   successors: %bb.6(0x80000000)
+  ; LIMIT-NEXT: {{  $}}
+  ; LIMIT-NEXT:   [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  ; LIMIT-NEXT:   [[SHR32ri1:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm1]], 1, implicit-def dead $eflags
+  ; LIMIT-NEXT:   JMP_1 %bb.6
+  ; LIMIT-NEXT: {{  $}}
+  ; LIMIT-NEXT: bb.3 (%ir-block.10):
+  ; LIMIT-NEXT:   successors: %bb.6(0x80000000)
+  ; LIMIT-NEXT: {{  $}}
+  ; LIMIT-NEXT:   [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  ; LIMIT-NEXT:   [[SHR32ri2:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm2]], 2, implicit-def dead $eflags
+  ; LIMIT-NEXT:   JMP_1 %bb.6
+  ; LIMIT-NEXT: {{  $}}
+  ; LIMIT-NEXT: bb.4 (%ir-block.13):
+  ; LIMIT-NEXT:   successors: %bb.6(0x80000000)
+  ; LIMIT-NEXT: {{  $}}
+  ; LIMIT-NEXT:   [[MOV32rm3:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  ; LIMIT-NEXT:   [[SHR32ri3:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm3]], 3, implicit-def dead $eflags
+  ; LIMIT-NEXT:   JMP_1 %bb.6
+  ; LIMIT-NEXT: {{  $}}
+  ; LIMIT-NEXT: bb.5.default.unreachable2:
+  ; LIMIT-NEXT:   successors:
+  ; LIMIT: bb.6 (%ir-block.16):
+  ; LIMIT-NEXT:   successors: %bb.7(0x20000000), %bb.8(0x20000000), %bb.9(0x20000000), %bb.10(0x20000000)
+  ; LIMIT-NEXT: {{  $}}
+  ; LIMIT-NEXT:   [[PHI:%[0-9]+]]:gr32 = PHI [[SHR32ri3]], %bb.4, [[SHR32ri2]], %bb.3, [[SHR32ri1]], %bb.2, [[MOV32rm]], %bb.1
+  ; LIMIT-NEXT:   [[SHR32ri4:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
+  ; LIMIT-NEXT:   [[AND32ri1:%[0-9]+]]:gr32 = AND32ri [[SHR32ri4]], 7, implicit-def dead $eflags
+  ; LIMIT-NEXT:   [[SUBREG_TO_REG1:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, killed [[AND32ri1]], %subreg.sub_32bit
+  ; LIMIT-NEXT:   JMP64m $noreg, 8, [[SUBREG_TO_REG1]], %jump-table.1, $noreg :: (load (s64) from jump-table)
+  ; LIMIT-NEXT: {{  $}}
+  ; LIMIT-NEXT: bb.7 (%ir-block.20):
+  ; LIMIT-NEXT:   successors: %bb.11(0x80000000)
+  ; LIMIT-NEXT: {{  $}}
+  ; LIMIT-NEXT:   [[MOV32rm4:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  ; LIMIT-NEXT:   JMP_1 %bb.11
+  ; LIMIT-NEXT: {{  $}}
+  ; LIMIT-NEXT: bb.8 (%ir-block.22):
+  ; LIMIT-NEXT:   successors: %bb.11(0x80000000)
+  ; LIMIT-NEXT: {{  $}}
+  ; LIMIT-NEXT:   [[MOV32rm5:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  ; LIMIT-NEXT:   [[SHR32r...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/78582


More information about the llvm-commits mailing list