[llvm] 17eb6b6 - Revert "[Taildup] Don't tail-duplicate loop header with multiple successors as its latches"

Wed Nov 24 00:25:03 PST 2021

Reminder to *please* mention why something is being reverted...

On Wed, Nov 24, 2021 at 5:26 AM Jun Ma via llvm-commits
<llvm-commits at lists.llvm.org> wrote:
>
>
> Author: Jun Ma
> Date: 2021-11-24T10:26:37+08:00
> New Revision: 17eb6b61de4b4d7a35680254a59118a0e3fa8dc9
>
> URL: https://github.com/llvm/llvm-project/commit/17eb6b61de4b4d7a35680254a59118a0e3fa8dc9
> DIFF: https://github.com/llvm/llvm-project/commit/17eb6b61de4b4d7a35680254a59118a0e3fa8dc9.diff
>
> LOG: Revert "[Taildup] Don't tail-duplicate loop header with multiple successors as its latches"
>
> This reverts commit 1f9fa549841a2ec55aa5a131bfaf83f0383c4713.
>
> Added:
>
>
> Modified:
>     llvm/lib/CodeGen/TailDuplicator.cpp
>     llvm/test/CodeGen/X86/tail-dup-multiple-latch-loop.ll
>
> Removed:
>
>
>
> ################################################################################
> diff  --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp
> index 943bd18c6c8b0..54fc6ee45d00d 100644
> --- a/llvm/lib/CodeGen/TailDuplicator.cpp
> +++ b/llvm/lib/CodeGen/TailDuplicator.cpp
> @@ -70,12 +70,6 @@ static cl::opt<unsigned> TailDupIndirectBranchSize(
>               "end with indirect branches."), cl::init(20),
>      cl::Hidden);
>
> -static cl::opt<unsigned> TailDupJmpTableLoopSize(
> -    "tail-dup-jmptable-loop-size",
> -    cl::desc("Maximum loop latches to consider tail duplication that are "
> -             "successors of loop header."),
> -    cl::init(128), cl::Hidden);
> -
>  static cl::opt<bool>
>      TailDupVerify("tail-dup-verify",
>                    cl::desc("Verify sanity of PHI instructions during taildup"),
> @@ -569,29 +563,6 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
>    if (TailBB.isSuccessor(&TailBB))
>      return false;
>
> -  // When doing tail-duplication with jumptable loops like:
> -  //    1 -> 2 <-> 3                 |
> -  //          \  <-> 4               |
> -  //           \   <-> 5             |
> -  //            \    <-> ...         |
> -  //             \---> rest          |
> -  // quadratic number of edges and much more loops are added to CFG. This
> -  // may cause compile time regression when jumptable is quiet large.
> -  // So set the limit on jumptable cases.
> -  auto isLargeJumpTableLoop = [](const MachineBasicBlock &TailBB) {
> -    const SmallPtrSet<const MachineBasicBlock *, 8> Preds(TailBB.pred_begin(),
> -                                                          TailBB.pred_end());
> -    // Check the basic block has large number of successors, all of them only
> -    // have one successor which is the basic block itself.
> -    return llvm::count_if(
> -               TailBB.successors(), [&](const MachineBasicBlock *SuccBB) {
> -                 return Preds.count(SuccBB) && SuccBB->succ_size() == 1;
> -               }) > TailDupJmpTableLoopSize;
> -  };
> -
> -  if (isLargeJumpTableLoop(TailBB))
> -    return false;
> -
>    // Set the limit on the cost to duplicate. When optimizing for size,
>    // duplicate only one, because one branch instruction can be eliminated to
>    // compensate for the duplication.
>
> diff  --git a/llvm/test/CodeGen/X86/tail-dup-multiple-latch-loop.ll b/llvm/test/CodeGen/X86/tail-dup-multiple-latch-loop.ll
> index 25da377ec487b..2032c7244331c 100644
> --- a/llvm/test/CodeGen/X86/tail-dup-multiple-latch-loop.ll
> +++ b/llvm/test/CodeGen/X86/tail-dup-multiple-latch-loop.ll
> @@ -1,48 +1,76 @@
>  ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
> -; RUN: llc < %s -tail-dup-jmptable-loop-size=5 -mtriple=x86_64-unknown-linux-gnu  | FileCheck %s
> +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
>  define i8* @large_loop_switch(i8* %p) {
>  ; CHECK-LABEL: large_loop_switch:
>  ; CHECK:       # %bb.0: # %entry
>  ; CHECK-NEXT:    pushq %rbx
>  ; CHECK-NEXT:    .cfi_def_cfa_offset 16
>  ; CHECK-NEXT:    .cfi_offset %rbx, -16
> -; CHECK-NEXT:    movq %rdi, %rsi
> +; CHECK-NEXT:    movq %rdi, %rax
>  ; CHECK-NEXT:    movl $6, %ebx
> -; CHECK-NEXT:    movl %ebx, %eax
> -; CHECK-NEXT:    jmpq *.LJTI0_0(,%rax,8)
> +; CHECK-NEXT:    movl %ebx, %ecx
> +; CHECK-NEXT:    jmpq *.LJTI0_0(,%rcx,8)
> +; CHECK-NEXT:  .LBB0_1: # %for.cond.cleanup
> +; CHECK-NEXT:    movl $530, %edi # imm = 0x212
> +; CHECK-NEXT:    movq %rax, %rsi
> +; CHECK-NEXT:    popq %rbx
> +; CHECK-NEXT:    .cfi_def_cfa_offset 8
> +; CHECK-NEXT:    jmp ccc at PLT # TAILCALL
> +; CHECK-NEXT:    .p2align 4, 0x90
>  ; CHECK-NEXT:  .LBB0_2: # %sw.bb1
> +; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
> +; CHECK-NEXT:    .cfi_def_cfa_offset 16
>  ; CHECK-NEXT:    movl $531, %edi # imm = 0x213
> -; CHECK-NEXT:  .LBB0_3: # %for.body
> -; CHECK-NEXT:    callq ccc at PLT
> -; CHECK-NEXT:  .LBB0_4: # %for.body
>  ; CHECK-NEXT:    movq %rax, %rsi
> +; CHECK-NEXT:    callq ccc at PLT
>  ; CHECK-NEXT:    decl %ebx
> -; CHECK-NEXT:    movl %ebx, %eax
> -; CHECK-NEXT:    jmpq *.LJTI0_0(,%rax,8)
> -; CHECK-NEXT:  .LBB0_5: # %sw.bb3
> +; CHECK-NEXT:    movl %ebx, %ecx
> +; CHECK-NEXT:    jmpq *.LJTI0_0(,%rcx,8)
> +; CHECK-NEXT:    .p2align 4, 0x90
> +; CHECK-NEXT:  .LBB0_3: # %sw.bb3
> +; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
>  ; CHECK-NEXT:    movl $532, %edi # imm = 0x214
> +; CHECK-NEXT:    movq %rax, %rsi
>  ; CHECK-NEXT:    callq bbb at PLT
> -; CHECK-NEXT:    jmp .LBB0_4
> -; CHECK-NEXT:  .LBB0_7: # %sw.bb5
> +; CHECK-NEXT:    decl %ebx
> +; CHECK-NEXT:    movl %ebx, %ecx
> +; CHECK-NEXT:    jmpq *.LJTI0_0(,%rcx,8)
> +; CHECK-NEXT:    .p2align 4, 0x90
> +; CHECK-NEXT:  .LBB0_4: # %sw.bb5
> +; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
>  ; CHECK-NEXT:    movl $533, %edi # imm = 0x215
> +; CHECK-NEXT:    movq %rax, %rsi
>  ; CHECK-NEXT:    callq bbb at PLT
> -; CHECK-NEXT:    jmp .LBB0_4
> -; CHECK-NEXT:  .LBB0_8: # %sw.bb7
> +; CHECK-NEXT:    decl %ebx
> +; CHECK-NEXT:    movl %ebx, %ecx
> +; CHECK-NEXT:    jmpq *.LJTI0_0(,%rcx,8)
> +; CHECK-NEXT:    .p2align 4, 0x90
> +; CHECK-NEXT:  .LBB0_5: # %sw.bb7
> +; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
>  ; CHECK-NEXT:    movl $535, %edi # imm = 0x217
> +; CHECK-NEXT:    movq %rax, %rsi
>  ; CHECK-NEXT:    callq bbb at PLT
> -; CHECK-NEXT:    jmp .LBB0_4
> -; CHECK-NEXT:  .LBB0_9: # %sw.bb9
> +; CHECK-NEXT:    decl %ebx
> +; CHECK-NEXT:    movl %ebx, %ecx
> +; CHECK-NEXT:    jmpq *.LJTI0_0(,%rcx,8)
> +; CHECK-NEXT:    .p2align 4, 0x90
> +; CHECK-NEXT:  .LBB0_6: # %sw.bb9
> +; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
>  ; CHECK-NEXT:    movl $536, %edi # imm = 0x218
> -; CHECK-NEXT:    jmp .LBB0_3
> -; CHECK-NEXT:  .LBB0_10: # %sw.bb11
> +; CHECK-NEXT:    movq %rax, %rsi
> +; CHECK-NEXT:    callq ccc at PLT
> +; CHECK-NEXT:    decl %ebx
> +; CHECK-NEXT:    movl %ebx, %ecx
> +; CHECK-NEXT:    jmpq *.LJTI0_0(,%rcx,8)
> +; CHECK-NEXT:    .p2align 4, 0x90
> +; CHECK-NEXT:  .LBB0_7: # %sw.bb11
> +; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
>  ; CHECK-NEXT:    movl $658, %edi # imm = 0x292
> +; CHECK-NEXT:    movq %rax, %rsi
>  ; CHECK-NEXT:    callq bbb at PLT
> -; CHECK-NEXT:    jmp .LBB0_4
> -; CHECK-NEXT:  .LBB0_11: # %for.cond.cleanup
> -; CHECK-NEXT:    movl $530, %edi # imm = 0x212
> -; CHECK-NEXT:    popq %rbx
> -; CHECK-NEXT:    .cfi_def_cfa_offset 8
> -; CHECK-NEXT:    jmp ccc at PLT # TAILCALL
> +; CHECK-NEXT:    decl %ebx
> +; CHECK-NEXT:    movl %ebx, %ecx
> +; CHECK-NEXT:    jmpq *.LJTI0_0(,%rcx,8)
>  entry:
>    br label %for.body
>
>
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits