[llvm] [TailDuplicator] Add a limit on the number of indirect branch successors (PR #78582)

Quentin Dian via llvm-commits llvm-commits at lists.llvm.org
Sat Feb 24 05:24:39 PST 2024


https://github.com/DianQK updated https://github.com/llvm/llvm-project/pull/78582

>From 948dfb97685109c346aedbdae5122cdd9db499d9 Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Mon, 5 Feb 2024 20:58:47 +0800
Subject: [PATCH 1/4] Pre-commit test cases

---
 .../CodeGen/X86/tail-dup-pred-succ-size.mir   | 434 ++++++++++++++++++
 1 file changed, 434 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir

diff --git a/llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir b/llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir
new file mode 100644
index 00000000000000..fe5bf1831d09cc
--- /dev/null
+++ b/llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir
@@ -0,0 +1,434 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=early-tailduplication %s -o - | FileCheck %s -check-prefix=LIMIT
+# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=early-tailduplication %s -o - | FileCheck %s -check-prefix=NOLIMIT
+
+--- |
+  source_filename = "tail-dup-pred-succ-size.ll"
+  target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+
+  define i32 @foo(ptr %0, i32 %1) {
+    %3 = lshr i32 %1, 1
+    %4 = and i32 %3, 7
+    switch i32 %4, label %default.unreachable2 [
+      i32 0, label %5
+      i32 1, label %7
+      i32 2, label %10
+      i32 3, label %13
+    ]
+
+  5:                                                ; preds = %2
+    %6 = load i32, ptr %0, align 4
+    br label %16
+
+  7:                                                ; preds = %2
+    %8 = load i32, ptr %0, align 4
+    %9 = lshr i32 %8, 1
+    br label %16
+
+  10:                                               ; preds = %2
+    %11 = load i32, ptr %0, align 4
+    %12 = lshr i32 %11, 2
+    br label %16
+
+  13:                                               ; preds = %2
+    %14 = load i32, ptr %0, align 4
+    %15 = lshr i32 %14, 3
+    br label %16
+
+  default.unreachable2:                             ; preds = %16, %2
+    unreachable
+
+  16:                                               ; preds = %13, %10, %7, %5
+    %17 = phi i32 [ %15, %13 ], [ %12, %10 ], [ %9, %7 ], [ %6, %5 ]
+    %18 = lshr i32 %1, 2
+    %19 = and i32 %18, 7
+    switch i32 %19, label %default.unreachable2 [
+      i32 0, label %20
+      i32 1, label %22
+      i32 2, label %25
+      i32 3, label %28
+    ]
+
+  20:                                               ; preds = %16
+    %21 = load i32, ptr %0, align 4
+    br label %31
+
+  22:                                               ; preds = %16
+    %23 = load i32, ptr %0, align 4
+    %24 = lshr i32 %23, 1
+    br label %31
+
+  25:                                               ; preds = %16
+    %26 = load i32, ptr %0, align 4
+    %27 = lshr i32 %26, 2
+    br label %31
+
+  28:                                               ; preds = %16
+    %29 = load i32, ptr %0, align 4
+    %30 = lshr i32 %29, 6
+    br label %31
+
+  31:                                               ; preds = %28, %25, %22, %20
+    %32 = phi i32 [ %30, %28 ], [ %27, %25 ], [ %24, %22 ], [ %21, %20 ]
+    %33 = or i32 %32, %17
+    ret i32 %33
+  }
+
+...
+---
+name:            foo
+alignment:       16
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+callsEHReturn:   false
+callsUnwindInit: false
+hasEHCatchret:   false
+hasEHScopes:     false
+hasEHFunclets:   false
+isOutlined:      false
+debugInstrRef:   true
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+  - { id: 0, class: gr32, preferred-register: '' }
+  - { id: 1, class: gr32, preferred-register: '' }
+  - { id: 2, class: gr32, preferred-register: '' }
+  - { id: 3, class: gr32, preferred-register: '' }
+  - { id: 4, class: gr32, preferred-register: '' }
+  - { id: 5, class: gr32, preferred-register: '' }
+  - { id: 6, class: gr32, preferred-register: '' }
+  - { id: 7, class: gr32, preferred-register: '' }
+  - { id: 8, class: gr32, preferred-register: '' }
+  - { id: 9, class: gr32, preferred-register: '' }
+  - { id: 10, class: gr64, preferred-register: '' }
+  - { id: 11, class: gr32, preferred-register: '' }
+  - { id: 12, class: gr64_nosp, preferred-register: '' }
+  - { id: 13, class: gr32, preferred-register: '' }
+  - { id: 14, class: gr32, preferred-register: '' }
+  - { id: 15, class: gr32, preferred-register: '' }
+  - { id: 16, class: gr32, preferred-register: '' }
+  - { id: 17, class: gr32, preferred-register: '' }
+  - { id: 18, class: gr64_nosp, preferred-register: '' }
+  - { id: 19, class: gr32, preferred-register: '' }
+  - { id: 20, class: gr32, preferred-register: '' }
+  - { id: 21, class: gr32, preferred-register: '' }
+  - { id: 22, class: gr32, preferred-register: '' }
+  - { id: 23, class: gr32, preferred-register: '' }
+  - { id: 24, class: gr32, preferred-register: '' }
+liveins:
+  - { reg: '$rdi', virtual-reg: '%10' }
+  - { reg: '$esi', virtual-reg: '%11' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    1
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  functionContext: ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  hasTailCall:     false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           []
+entry_values:    []
+callSites:       []
+debugValueSubstitutions: []
+constants:       []
+machineFunctionInfo: {}
+jumpTable:
+  kind:            block-address
+  entries:
+    - id:              0
+      blocks:          [ '%bb.1', '%bb.2', '%bb.3', '%bb.4' ]
+    - id:              1
+      blocks:          [ '%bb.7', '%bb.8', '%bb.9', '%bb.10' ]
+body:             |
+  ; LIMIT-LABEL: name: foo
+  ; LIMIT: bb.0 (%ir-block.2):
+  ; LIMIT-NEXT:   successors: %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000), %bb.5(0x20000000)
+  ; LIMIT-NEXT:   liveins: $rdi, $esi
+  ; LIMIT-NEXT: {{  $}}
+  ; LIMIT-NEXT:   [[COPY:%[0-9]+]]:gr32 = COPY $esi
+  ; LIMIT-NEXT:   [[COPY1:%[0-9]+]]:gr64 = COPY $rdi
+  ; LIMIT-NEXT:   [[SHR32ri:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 1, implicit-def dead $eflags
+  ; LIMIT-NEXT:   [[AND32ri:%[0-9]+]]:gr32 = AND32ri [[SHR32ri]], 7, implicit-def dead $eflags
+  ; LIMIT-NEXT:   [[SUBREG_TO_REG:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, killed [[AND32ri]], %subreg.sub_32bit
+  ; LIMIT-NEXT:   JMP64m $noreg, 8, [[SUBREG_TO_REG]], %jump-table.0, $noreg :: (load (s64) from jump-table)
+  ; LIMIT-NEXT: {{  $}}
+  ; LIMIT-NEXT: bb.2 (%ir-block.5):
+  ; LIMIT-NEXT:   successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
+  ; LIMIT-NEXT: {{  $}}
+  ; LIMIT-NEXT:   [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  ; LIMIT-NEXT:   [[SHR32ri1:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
+  ; LIMIT-NEXT:   [[AND32ri1:%[0-9]+]]:gr32 = AND32ri [[SHR32ri1]], 7, implicit-def dead $eflags
+  ; LIMIT-NEXT:   [[SUBREG_TO_REG1:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri1]], %subreg.sub_32bit
+  ; LIMIT-NEXT:   JMP64m $noreg, 8, [[SUBREG_TO_REG1]], %jump-table.1, $noreg :: (load (s64) from jump-table)
+  ; LIMIT-NEXT: {{  $}}
+  ; LIMIT-NEXT: bb.3 (%ir-block.7):
+  ; LIMIT-NEXT:   successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
+  ; LIMIT-NEXT: {{  $}}
+  ; LIMIT-NEXT:   [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  ; LIMIT-NEXT:   [[SHR32ri2:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm1]], 1, implicit-def dead $eflags
+  ; LIMIT-NEXT:   [[SHR32ri3:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
+  ; LIMIT-NEXT:   [[AND32ri2:%[0-9]+]]:gr32 = AND32ri [[SHR32ri3]], 7, implicit-def dead $eflags
+  ; LIMIT-NEXT:   [[SUBREG_TO_REG2:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri2]], %subreg.sub_32bit
+  ; LIMIT-NEXT:   JMP64m $noreg, 8, [[SUBREG_TO_REG2]], %jump-table.1, $noreg :: (load (s64) from jump-table)
+  ; LIMIT-NEXT: {{  $}}
+  ; LIMIT-NEXT: bb.4 (%ir-block.10):
+  ; LIMIT-NEXT:   successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
+  ; LIMIT-NEXT: {{  $}}
+  ; LIMIT-NEXT:   [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  ; LIMIT-NEXT:   [[SHR32ri4:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm2]], 2, implicit-def dead $eflags
+  ; LIMIT-NEXT:   [[SHR32ri5:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
+  ; LIMIT-NEXT:   [[AND32ri3:%[0-9]+]]:gr32 = AND32ri [[SHR32ri5]], 7, implicit-def dead $eflags
+  ; LIMIT-NEXT:   [[SUBREG_TO_REG3:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri3]], %subreg.sub_32bit
+  ; LIMIT-NEXT:   JMP64m $noreg, 8, [[SUBREG_TO_REG3]], %jump-table.1, $noreg :: (load (s64) from jump-table)
+  ; LIMIT-NEXT: {{  $}}
+  ; LIMIT-NEXT: bb.5 (%ir-block.13):
+  ; LIMIT-NEXT:   successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
+  ; LIMIT-NEXT: {{  $}}
+  ; LIMIT-NEXT:   [[MOV32rm3:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  ; LIMIT-NEXT:   [[SHR32ri6:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm3]], 3, implicit-def dead $eflags
+  ; LIMIT-NEXT:   [[SHR32ri7:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
+  ; LIMIT-NEXT:   [[AND32ri4:%[0-9]+]]:gr32 = AND32ri [[SHR32ri7]], 7, implicit-def dead $eflags
+  ; LIMIT-NEXT:   [[SUBREG_TO_REG4:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri4]], %subreg.sub_32bit
+  ; LIMIT-NEXT:   JMP64m $noreg, 8, [[SUBREG_TO_REG4]], %jump-table.1, $noreg :: (load (s64) from jump-table)
+  ; LIMIT-NEXT: {{  $}}
+  ; LIMIT-NEXT: bb.6.default.unreachable2:
+  ; LIMIT-NEXT:   successors:
+  ; LIMIT-NEXT: {{  $}}
+  ; LIMIT-NEXT: bb.9 (%ir-block.20):
+  ; LIMIT-NEXT:   successors: %bb.13(0x80000000)
+  ; LIMIT-NEXT: {{  $}}
+  ; LIMIT-NEXT:   [[PHI:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5
+  ; LIMIT-NEXT:   [[MOV32rm4:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  ; LIMIT-NEXT:   JMP_1 %bb.13
+  ; LIMIT-NEXT: {{  $}}
+  ; LIMIT-NEXT: bb.10 (%ir-block.22):
+  ; LIMIT-NEXT:   successors: %bb.13(0x80000000)
+  ; LIMIT-NEXT: {{  $}}
+  ; LIMIT-NEXT:   [[PHI1:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5
+  ; LIMIT-NEXT:   [[MOV32rm5:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  ; LIMIT-NEXT:   [[SHR32ri8:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm5]], 1, implicit-def dead $eflags
+  ; LIMIT-NEXT:   JMP_1 %bb.13
+  ; LIMIT-NEXT: {{  $}}
+  ; LIMIT-NEXT: bb.11 (%ir-block.25):
+  ; LIMIT-NEXT:   successors: %bb.13(0x80000000)
+  ; LIMIT-NEXT: {{  $}}
+  ; LIMIT-NEXT:   [[PHI2:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5
+  ; LIMIT-NEXT:   [[MOV32rm6:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  ; LIMIT-NEXT:   [[SHR32ri9:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm6]], 2, implicit-def dead $eflags
+  ; LIMIT-NEXT:   JMP_1 %bb.13
+  ; LIMIT-NEXT: {{  $}}
+  ; LIMIT-NEXT: bb.12 (%ir-block.28):
+  ; LIMIT-NEXT:   successors: %bb.13(0x80000000)
+  ; LIMIT-NEXT: {{  $}}
+  ; LIMIT-NEXT:   [[PHI3:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5
+  ; LIMIT-NEXT:   [[MOV32rm7:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  ; LIMIT-NEXT:   [[SHR32ri10:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm7]], 6, implicit-def dead $eflags
+  ; LIMIT-NEXT: {{  $}}
+  ; LIMIT-NEXT: bb.13 (%ir-block.31):
+  ; LIMIT-NEXT:   [[PHI4:%[0-9]+]]:gr32 = PHI [[PHI]], %bb.9, [[PHI1]], %bb.10, [[PHI2]], %bb.11, [[PHI3]], %bb.12
+  ; LIMIT-NEXT:   [[PHI5:%[0-9]+]]:gr32 = PHI [[SHR32ri10]], %bb.12, [[SHR32ri9]], %bb.11, [[SHR32ri8]], %bb.10, [[MOV32rm4]], %bb.9
+  ; LIMIT-NEXT:   [[OR32rr:%[0-9]+]]:gr32 = OR32rr [[PHI5]], [[PHI4]], implicit-def dead $eflags
+  ; LIMIT-NEXT:   $eax = COPY [[OR32rr]]
+  ; LIMIT-NEXT:   RET 0, $eax
+  ;
+  ; NOLIMIT-LABEL: name: foo
+  ; NOLIMIT: bb.0 (%ir-block.2):
+  ; NOLIMIT-NEXT:   successors: %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000), %bb.5(0x20000000)
+  ; NOLIMIT-NEXT:   liveins: $rdi, $esi
+  ; NOLIMIT-NEXT: {{  $}}
+  ; NOLIMIT-NEXT:   [[COPY:%[0-9]+]]:gr32 = COPY $esi
+  ; NOLIMIT-NEXT:   [[COPY1:%[0-9]+]]:gr64 = COPY $rdi
+  ; NOLIMIT-NEXT:   [[SHR32ri:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 1, implicit-def dead $eflags
+  ; NOLIMIT-NEXT:   [[AND32ri:%[0-9]+]]:gr32 = AND32ri [[SHR32ri]], 7, implicit-def dead $eflags
+  ; NOLIMIT-NEXT:   [[SUBREG_TO_REG:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, killed [[AND32ri]], %subreg.sub_32bit
+  ; NOLIMIT-NEXT:   JMP64m $noreg, 8, [[SUBREG_TO_REG]], %jump-table.0, $noreg :: (load (s64) from jump-table)
+  ; NOLIMIT-NEXT: {{  $}}
+  ; NOLIMIT-NEXT: bb.2 (%ir-block.5):
+  ; NOLIMIT-NEXT:   successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
+  ; NOLIMIT-NEXT: {{  $}}
+  ; NOLIMIT-NEXT:   [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  ; NOLIMIT-NEXT:   [[SHR32ri1:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
+  ; NOLIMIT-NEXT:   [[AND32ri1:%[0-9]+]]:gr32 = AND32ri [[SHR32ri1]], 7, implicit-def dead $eflags
+  ; NOLIMIT-NEXT:   [[SUBREG_TO_REG1:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri1]], %subreg.sub_32bit
+  ; NOLIMIT-NEXT:   JMP64m $noreg, 8, [[SUBREG_TO_REG1]], %jump-table.1, $noreg :: (load (s64) from jump-table)
+  ; NOLIMIT-NEXT: {{  $}}
+  ; NOLIMIT-NEXT: bb.3 (%ir-block.7):
+  ; NOLIMIT-NEXT:   successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
+  ; NOLIMIT-NEXT: {{  $}}
+  ; NOLIMIT-NEXT:   [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  ; NOLIMIT-NEXT:   [[SHR32ri2:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm1]], 1, implicit-def dead $eflags
+  ; NOLIMIT-NEXT:   [[SHR32ri3:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
+  ; NOLIMIT-NEXT:   [[AND32ri2:%[0-9]+]]:gr32 = AND32ri [[SHR32ri3]], 7, implicit-def dead $eflags
+  ; NOLIMIT-NEXT:   [[SUBREG_TO_REG2:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri2]], %subreg.sub_32bit
+  ; NOLIMIT-NEXT:   JMP64m $noreg, 8, [[SUBREG_TO_REG2]], %jump-table.1, $noreg :: (load (s64) from jump-table)
+  ; NOLIMIT-NEXT: {{  $}}
+  ; NOLIMIT-NEXT: bb.4 (%ir-block.10):
+  ; NOLIMIT-NEXT:   successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
+  ; NOLIMIT-NEXT: {{  $}}
+  ; NOLIMIT-NEXT:   [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  ; NOLIMIT-NEXT:   [[SHR32ri4:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm2]], 2, implicit-def dead $eflags
+  ; NOLIMIT-NEXT:   [[SHR32ri5:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
+  ; NOLIMIT-NEXT:   [[AND32ri3:%[0-9]+]]:gr32 = AND32ri [[SHR32ri5]], 7, implicit-def dead $eflags
+  ; NOLIMIT-NEXT:   [[SUBREG_TO_REG3:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri3]], %subreg.sub_32bit
+  ; NOLIMIT-NEXT:   JMP64m $noreg, 8, [[SUBREG_TO_REG3]], %jump-table.1, $noreg :: (load (s64) from jump-table)
+  ; NOLIMIT-NEXT: {{  $}}
+  ; NOLIMIT-NEXT: bb.5 (%ir-block.13):
+  ; NOLIMIT-NEXT:   successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
+  ; NOLIMIT-NEXT: {{  $}}
+  ; NOLIMIT-NEXT:   [[MOV32rm3:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  ; NOLIMIT-NEXT:   [[SHR32ri6:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm3]], 3, implicit-def dead $eflags
+  ; NOLIMIT-NEXT:   [[SHR32ri7:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
+  ; NOLIMIT-NEXT:   [[AND32ri4:%[0-9]+]]:gr32 = AND32ri [[SHR32ri7]], 7, implicit-def dead $eflags
+  ; NOLIMIT-NEXT:   [[SUBREG_TO_REG4:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri4]], %subreg.sub_32bit
+  ; NOLIMIT-NEXT:   JMP64m $noreg, 8, [[SUBREG_TO_REG4]], %jump-table.1, $noreg :: (load (s64) from jump-table)
+  ; NOLIMIT-NEXT: {{  $}}
+  ; NOLIMIT-NEXT: bb.6.default.unreachable2:
+  ; NOLIMIT-NEXT:   successors:
+  ; NOLIMIT-NEXT: {{  $}}
+  ; NOLIMIT-NEXT: bb.9 (%ir-block.20):
+  ; NOLIMIT-NEXT:   successors: %bb.13(0x80000000)
+  ; NOLIMIT-NEXT: {{  $}}
+  ; NOLIMIT-NEXT:   [[PHI:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5
+  ; NOLIMIT-NEXT:   [[MOV32rm4:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  ; NOLIMIT-NEXT:   JMP_1 %bb.13
+  ; NOLIMIT-NEXT: {{  $}}
+  ; NOLIMIT-NEXT: bb.10 (%ir-block.22):
+  ; NOLIMIT-NEXT:   successors: %bb.13(0x80000000)
+  ; NOLIMIT-NEXT: {{  $}}
+  ; NOLIMIT-NEXT:   [[PHI1:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5
+  ; NOLIMIT-NEXT:   [[MOV32rm5:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  ; NOLIMIT-NEXT:   [[SHR32ri8:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm5]], 1, implicit-def dead $eflags
+  ; NOLIMIT-NEXT:   JMP_1 %bb.13
+  ; NOLIMIT-NEXT: {{  $}}
+  ; NOLIMIT-NEXT: bb.11 (%ir-block.25):
+  ; NOLIMIT-NEXT:   successors: %bb.13(0x80000000)
+  ; NOLIMIT-NEXT: {{  $}}
+  ; NOLIMIT-NEXT:   [[PHI2:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5
+  ; NOLIMIT-NEXT:   [[MOV32rm6:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  ; NOLIMIT-NEXT:   [[SHR32ri9:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm6]], 2, implicit-def dead $eflags
+  ; NOLIMIT-NEXT:   JMP_1 %bb.13
+  ; NOLIMIT-NEXT: {{  $}}
+  ; NOLIMIT-NEXT: bb.12 (%ir-block.28):
+  ; NOLIMIT-NEXT:   successors: %bb.13(0x80000000)
+  ; NOLIMIT-NEXT: {{  $}}
+  ; NOLIMIT-NEXT:   [[PHI3:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5
+  ; NOLIMIT-NEXT:   [[MOV32rm7:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  ; NOLIMIT-NEXT:   [[SHR32ri10:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm7]], 6, implicit-def dead $eflags
+  ; NOLIMIT-NEXT: {{  $}}
+  ; NOLIMIT-NEXT: bb.13 (%ir-block.31):
+  ; NOLIMIT-NEXT:   [[PHI4:%[0-9]+]]:gr32 = PHI [[PHI]], %bb.9, [[PHI1]], %bb.10, [[PHI2]], %bb.11, [[PHI3]], %bb.12
+  ; NOLIMIT-NEXT:   [[PHI5:%[0-9]+]]:gr32 = PHI [[SHR32ri10]], %bb.12, [[SHR32ri9]], %bb.11, [[SHR32ri8]], %bb.10, [[MOV32rm4]], %bb.9
+  ; NOLIMIT-NEXT:   [[OR32rr:%[0-9]+]]:gr32 = OR32rr [[PHI5]], [[PHI4]], implicit-def dead $eflags
+  ; NOLIMIT-NEXT:   $eax = COPY [[OR32rr]]
+  ; NOLIMIT-NEXT:   RET 0, $eax
+  bb.0 (%ir-block.2):
+    successors: %bb.12(0x80000000)
+    liveins: $rdi, $esi
+
+    %11:gr32 = COPY $esi
+    %10:gr64 = COPY $rdi
+    %13:gr32 = SHR32ri %11, 1, implicit-def dead $eflags
+    %14:gr32 = AND32ri %13, 7, implicit-def dead $eflags
+    %12:gr64_nosp = SUBREG_TO_REG 0, killed %14, %subreg.sub_32bit
+
+  bb.12 (%ir-block.2):
+    successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000)
+
+    JMP64m $noreg, 8, %12, %jump-table.0, $noreg :: (load (s64) from jump-table)
+
+  bb.1 (%ir-block.5):
+    successors: %bb.6(0x80000000)
+
+    %0:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+    JMP_1 %bb.6
+
+  bb.2 (%ir-block.7):
+    successors: %bb.6(0x80000000)
+
+    %17:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+    %1:gr32 = SHR32ri %17, 1, implicit-def dead $eflags
+    JMP_1 %bb.6
+
+  bb.3 (%ir-block.10):
+    successors: %bb.6(0x80000000)
+
+    %16:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+    %2:gr32 = SHR32ri %16, 2, implicit-def dead $eflags
+    JMP_1 %bb.6
+
+  bb.4 (%ir-block.13):
+    successors: %bb.6(0x80000000)
+
+    %15:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+    %3:gr32 = SHR32ri %15, 3, implicit-def dead $eflags
+    JMP_1 %bb.6
+
+  bb.5.default.unreachable2:
+    successors:
+
+
+  bb.6 (%ir-block.16):
+    successors: %bb.13(0x80000000)
+
+    %4:gr32 = PHI %3, %bb.4, %2, %bb.3, %1, %bb.2, %0, %bb.1
+    %19:gr32 = SHR32ri %11, 2, implicit-def dead $eflags
+    %20:gr32 = AND32ri %19, 7, implicit-def dead $eflags
+    %18:gr64_nosp = SUBREG_TO_REG 0, killed %20, %subreg.sub_32bit
+
+  bb.13 (%ir-block.16):
+    successors: %bb.7(0x20000000), %bb.8(0x20000000), %bb.9(0x20000000), %bb.10(0x20000000)
+
+    JMP64m $noreg, 8, %18, %jump-table.1, $noreg :: (load (s64) from jump-table)
+
+  bb.7 (%ir-block.20):
+    successors: %bb.11(0x80000000)
+
+    %5:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+    JMP_1 %bb.11
+
+  bb.8 (%ir-block.22):
+    successors: %bb.11(0x80000000)
+
+    %23:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+    %6:gr32 = SHR32ri %23, 1, implicit-def dead $eflags
+    JMP_1 %bb.11
+
+  bb.9 (%ir-block.25):
+    successors: %bb.11(0x80000000)
+
+    %22:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+    %7:gr32 = SHR32ri %22, 2, implicit-def dead $eflags
+    JMP_1 %bb.11
+
+  bb.10 (%ir-block.28):
+    successors: %bb.11(0x80000000)
+
+    %21:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+    %8:gr32 = SHR32ri %21, 6, implicit-def dead $eflags
+
+  bb.11 (%ir-block.31):
+    %9:gr32 = PHI %8, %bb.10, %7, %bb.9, %6, %bb.8, %5, %bb.7
+    %24:gr32 = OR32rr %9, %4, implicit-def dead $eflags
+    $eax = COPY %24
+    RET 0, $eax
+
+...

>From d155f418bc760dfbd71c65fb2981afc9884b2ba0 Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Mon, 5 Feb 2024 21:16:58 +0800
Subject: [PATCH 2/4] [TailDuplicator] Add maximum predecessors and successors
 to consider tail duplicating blocks

---
 llvm/lib/CodeGen/TailDuplicator.cpp           | 16 +++++
 .../CodeGen/X86/tail-dup-pred-succ-size.mir   | 62 ++++++++-----------
 2 files changed, 43 insertions(+), 35 deletions(-)

diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp
index 5ed67bd0a121ed..c1e32fa7d63692 100644
--- a/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -68,6 +68,18 @@ static cl::opt<unsigned> TailDupIndirectBranchSize(
              "end with indirect branches."), cl::init(20),
     cl::Hidden);
 
+static cl::opt<unsigned>
+    TailDupPredSize("tail-dup-pred-size",
+                    cl::desc("Maximum predecessors (maximum successors at the "
+                             "same time) to consider tail duplicating blocks."),
+                    cl::init(16), cl::Hidden);
+
+static cl::opt<unsigned>
+    TailDupSuccSize("tail-dup-succ-size",
+                    cl::desc("Maximum successors (maximum predecessors at the "
+                             "same time) to consider tail duplicating blocks."),
+                    cl::init(16), cl::Hidden);
+
 static cl::opt<bool>
     TailDupVerify("tail-dup-verify",
                   cl::desc("Verify sanity of PHI instructions during taildup"),
@@ -565,6 +577,10 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
   if (TailBB.isSuccessor(&TailBB))
     return false;
 
+  if (TailBB.pred_size() > TailDupPredSize &&
+      TailBB.succ_size() > TailDupSuccSize)
+    return false;
+
   // Set the limit on the cost to duplicate. When optimizing for size,
   // duplicate only one, because one branch instruction can be eliminated to
   // compensate for the duplication.
diff --git a/llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir b/llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir
index fe5bf1831d09cc..10448e33bd8ba2 100644
--- a/llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir
+++ b/llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir
@@ -1,6 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
-# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=early-tailduplication %s -o - | FileCheck %s -check-prefix=LIMIT
-# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=early-tailduplication %s -o - | FileCheck %s -check-prefix=NOLIMIT
+# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=early-tailduplication -tail-dup-pred-size=3 -tail-dup-succ-size=3 %s -o - | FileCheck %s -check-prefix=LIMIT
+# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=early-tailduplication -tail-dup-pred-size=4 -tail-dup-succ-size=4 %s -o - | FileCheck %s -check-prefix=NOLIMIT
 
 --- |
   source_filename = "tail-dup-pred-succ-size.ll"
@@ -172,81 +172,73 @@ body:             |
   ; LIMIT-NEXT:   JMP64m $noreg, 8, [[SUBREG_TO_REG]], %jump-table.0, $noreg :: (load (s64) from jump-table)
   ; LIMIT-NEXT: {{  $}}
   ; LIMIT-NEXT: bb.2 (%ir-block.5):
-  ; LIMIT-NEXT:   successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
+  ; LIMIT-NEXT:   successors: %bb.7(0x80000000)
   ; LIMIT-NEXT: {{  $}}
   ; LIMIT-NEXT:   [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
-  ; LIMIT-NEXT:   [[SHR32ri1:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
-  ; LIMIT-NEXT:   [[AND32ri1:%[0-9]+]]:gr32 = AND32ri [[SHR32ri1]], 7, implicit-def dead $eflags
-  ; LIMIT-NEXT:   [[SUBREG_TO_REG1:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri1]], %subreg.sub_32bit
-  ; LIMIT-NEXT:   JMP64m $noreg, 8, [[SUBREG_TO_REG1]], %jump-table.1, $noreg :: (load (s64) from jump-table)
+  ; LIMIT-NEXT:   JMP_1 %bb.7
   ; LIMIT-NEXT: {{  $}}
   ; LIMIT-NEXT: bb.3 (%ir-block.7):
-  ; LIMIT-NEXT:   successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
+  ; LIMIT-NEXT:   successors: %bb.7(0x80000000)
   ; LIMIT-NEXT: {{  $}}
   ; LIMIT-NEXT:   [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
-  ; LIMIT-NEXT:   [[SHR32ri2:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm1]], 1, implicit-def dead $eflags
-  ; LIMIT-NEXT:   [[SHR32ri3:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
-  ; LIMIT-NEXT:   [[AND32ri2:%[0-9]+]]:gr32 = AND32ri [[SHR32ri3]], 7, implicit-def dead $eflags
-  ; LIMIT-NEXT:   [[SUBREG_TO_REG2:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri2]], %subreg.sub_32bit
-  ; LIMIT-NEXT:   JMP64m $noreg, 8, [[SUBREG_TO_REG2]], %jump-table.1, $noreg :: (load (s64) from jump-table)
+  ; LIMIT-NEXT:   [[SHR32ri1:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm1]], 1, implicit-def dead $eflags
+  ; LIMIT-NEXT:   JMP_1 %bb.7
   ; LIMIT-NEXT: {{  $}}
   ; LIMIT-NEXT: bb.4 (%ir-block.10):
-  ; LIMIT-NEXT:   successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
+  ; LIMIT-NEXT:   successors: %bb.7(0x80000000)
   ; LIMIT-NEXT: {{  $}}
   ; LIMIT-NEXT:   [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
-  ; LIMIT-NEXT:   [[SHR32ri4:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm2]], 2, implicit-def dead $eflags
-  ; LIMIT-NEXT:   [[SHR32ri5:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
-  ; LIMIT-NEXT:   [[AND32ri3:%[0-9]+]]:gr32 = AND32ri [[SHR32ri5]], 7, implicit-def dead $eflags
-  ; LIMIT-NEXT:   [[SUBREG_TO_REG3:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri3]], %subreg.sub_32bit
-  ; LIMIT-NEXT:   JMP64m $noreg, 8, [[SUBREG_TO_REG3]], %jump-table.1, $noreg :: (load (s64) from jump-table)
+  ; LIMIT-NEXT:   [[SHR32ri2:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm2]], 2, implicit-def dead $eflags
+  ; LIMIT-NEXT:   JMP_1 %bb.7
   ; LIMIT-NEXT: {{  $}}
   ; LIMIT-NEXT: bb.5 (%ir-block.13):
-  ; LIMIT-NEXT:   successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
+  ; LIMIT-NEXT:   successors: %bb.7(0x80000000)
   ; LIMIT-NEXT: {{  $}}
   ; LIMIT-NEXT:   [[MOV32rm3:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
-  ; LIMIT-NEXT:   [[SHR32ri6:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm3]], 3, implicit-def dead $eflags
-  ; LIMIT-NEXT:   [[SHR32ri7:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
-  ; LIMIT-NEXT:   [[AND32ri4:%[0-9]+]]:gr32 = AND32ri [[SHR32ri7]], 7, implicit-def dead $eflags
-  ; LIMIT-NEXT:   [[SUBREG_TO_REG4:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri4]], %subreg.sub_32bit
-  ; LIMIT-NEXT:   JMP64m $noreg, 8, [[SUBREG_TO_REG4]], %jump-table.1, $noreg :: (load (s64) from jump-table)
+  ; LIMIT-NEXT:   [[SHR32ri3:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm3]], 3, implicit-def dead $eflags
+  ; LIMIT-NEXT:   JMP_1 %bb.7
   ; LIMIT-NEXT: {{  $}}
   ; LIMIT-NEXT: bb.6.default.unreachable2:
   ; LIMIT-NEXT:   successors:
   ; LIMIT-NEXT: {{  $}}
+  ; LIMIT-NEXT: bb.7 (%ir-block.16):
+  ; LIMIT-NEXT:   successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
+  ; LIMIT-NEXT: {{  $}}
+  ; LIMIT-NEXT:   [[PHI:%[0-9]+]]:gr32 = PHI [[SHR32ri3]], %bb.5, [[SHR32ri2]], %bb.4, [[SHR32ri1]], %bb.3, [[MOV32rm]], %bb.2
+  ; LIMIT-NEXT:   [[SHR32ri4:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
+  ; LIMIT-NEXT:   [[AND32ri1:%[0-9]+]]:gr32 = AND32ri [[SHR32ri4]], 7, implicit-def dead $eflags
+  ; LIMIT-NEXT:   [[SUBREG_TO_REG1:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, killed [[AND32ri1]], %subreg.sub_32bit
+  ; LIMIT-NEXT:   JMP64m $noreg, 8, [[SUBREG_TO_REG1]], %jump-table.1, $noreg :: (load (s64) from jump-table)
+  ; LIMIT-NEXT: {{  $}}
   ; LIMIT-NEXT: bb.9 (%ir-block.20):
   ; LIMIT-NEXT:   successors: %bb.13(0x80000000)
   ; LIMIT-NEXT: {{  $}}
-  ; LIMIT-NEXT:   [[PHI:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5
   ; LIMIT-NEXT:   [[MOV32rm4:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
   ; LIMIT-NEXT:   JMP_1 %bb.13
   ; LIMIT-NEXT: {{  $}}
   ; LIMIT-NEXT: bb.10 (%ir-block.22):
   ; LIMIT-NEXT:   successors: %bb.13(0x80000000)
   ; LIMIT-NEXT: {{  $}}
-  ; LIMIT-NEXT:   [[PHI1:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5
   ; LIMIT-NEXT:   [[MOV32rm5:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
-  ; LIMIT-NEXT:   [[SHR32ri8:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm5]], 1, implicit-def dead $eflags
+  ; LIMIT-NEXT:   [[SHR32ri5:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm5]], 1, implicit-def dead $eflags
   ; LIMIT-NEXT:   JMP_1 %bb.13
   ; LIMIT-NEXT: {{  $}}
   ; LIMIT-NEXT: bb.11 (%ir-block.25):
   ; LIMIT-NEXT:   successors: %bb.13(0x80000000)
   ; LIMIT-NEXT: {{  $}}
-  ; LIMIT-NEXT:   [[PHI2:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5
   ; LIMIT-NEXT:   [[MOV32rm6:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
-  ; LIMIT-NEXT:   [[SHR32ri9:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm6]], 2, implicit-def dead $eflags
+  ; LIMIT-NEXT:   [[SHR32ri6:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm6]], 2, implicit-def dead $eflags
   ; LIMIT-NEXT:   JMP_1 %bb.13
   ; LIMIT-NEXT: {{  $}}
   ; LIMIT-NEXT: bb.12 (%ir-block.28):
   ; LIMIT-NEXT:   successors: %bb.13(0x80000000)
   ; LIMIT-NEXT: {{  $}}
-  ; LIMIT-NEXT:   [[PHI3:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5
   ; LIMIT-NEXT:   [[MOV32rm7:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
-  ; LIMIT-NEXT:   [[SHR32ri10:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm7]], 6, implicit-def dead $eflags
+  ; LIMIT-NEXT:   [[SHR32ri7:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm7]], 6, implicit-def dead $eflags
   ; LIMIT-NEXT: {{  $}}
   ; LIMIT-NEXT: bb.13 (%ir-block.31):
-  ; LIMIT-NEXT:   [[PHI4:%[0-9]+]]:gr32 = PHI [[PHI]], %bb.9, [[PHI1]], %bb.10, [[PHI2]], %bb.11, [[PHI3]], %bb.12
-  ; LIMIT-NEXT:   [[PHI5:%[0-9]+]]:gr32 = PHI [[SHR32ri10]], %bb.12, [[SHR32ri9]], %bb.11, [[SHR32ri8]], %bb.10, [[MOV32rm4]], %bb.9
-  ; LIMIT-NEXT:   [[OR32rr:%[0-9]+]]:gr32 = OR32rr [[PHI5]], [[PHI4]], implicit-def dead $eflags
+  ; LIMIT-NEXT:   [[PHI1:%[0-9]+]]:gr32 = PHI [[SHR32ri7]], %bb.12, [[SHR32ri6]], %bb.11, [[SHR32ri5]], %bb.10, [[MOV32rm4]], %bb.9
+  ; LIMIT-NEXT:   [[OR32rr:%[0-9]+]]:gr32 = OR32rr [[PHI1]], [[PHI]], implicit-def dead $eflags
   ; LIMIT-NEXT:   $eax = COPY [[OR32rr]]
   ; LIMIT-NEXT:   RET 0, $eax
   ;

>From fea16d0e108a865f3cbf429a5841012fdff4a1e0 Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Sat, 24 Feb 2024 20:39:42 +0800
Subject: [PATCH 3/4] Simplify MIR

---
 .../CodeGen/X86/tail-dup-pred-succ-size.mir   | 332 +++++-------------
 1 file changed, 83 insertions(+), 249 deletions(-)

diff --git a/llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir b/llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir
index 10448e33bd8ba2..67f8cc72e0d726 100644
--- a/llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir
+++ b/llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir
@@ -2,165 +2,19 @@
 # RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=early-tailduplication -tail-dup-pred-size=3 -tail-dup-succ-size=3 %s -o - | FileCheck %s -check-prefix=LIMIT
 # RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=early-tailduplication -tail-dup-pred-size=4 -tail-dup-succ-size=4 %s -o - | FileCheck %s -check-prefix=NOLIMIT
 
---- |
-  source_filename = "tail-dup-pred-succ-size.ll"
-  target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
-
-  define i32 @foo(ptr %0, i32 %1) {
-    %3 = lshr i32 %1, 1
-    %4 = and i32 %3, 7
-    switch i32 %4, label %default.unreachable2 [
-      i32 0, label %5
-      i32 1, label %7
-      i32 2, label %10
-      i32 3, label %13
-    ]
-
-  5:                                                ; preds = %2
-    %6 = load i32, ptr %0, align 4
-    br label %16
-
-  7:                                                ; preds = %2
-    %8 = load i32, ptr %0, align 4
-    %9 = lshr i32 %8, 1
-    br label %16
-
-  10:                                               ; preds = %2
-    %11 = load i32, ptr %0, align 4
-    %12 = lshr i32 %11, 2
-    br label %16
-
-  13:                                               ; preds = %2
-    %14 = load i32, ptr %0, align 4
-    %15 = lshr i32 %14, 3
-    br label %16
-
-  default.unreachable2:                             ; preds = %16, %2
-    unreachable
-
-  16:                                               ; preds = %13, %10, %7, %5
-    %17 = phi i32 [ %15, %13 ], [ %12, %10 ], [ %9, %7 ], [ %6, %5 ]
-    %18 = lshr i32 %1, 2
-    %19 = and i32 %18, 7
-    switch i32 %19, label %default.unreachable2 [
-      i32 0, label %20
-      i32 1, label %22
-      i32 2, label %25
-      i32 3, label %28
-    ]
-
-  20:                                               ; preds = %16
-    %21 = load i32, ptr %0, align 4
-    br label %31
-
-  22:                                               ; preds = %16
-    %23 = load i32, ptr %0, align 4
-    %24 = lshr i32 %23, 1
-    br label %31
-
-  25:                                               ; preds = %16
-    %26 = load i32, ptr %0, align 4
-    %27 = lshr i32 %26, 2
-    br label %31
-
-  28:                                               ; preds = %16
-    %29 = load i32, ptr %0, align 4
-    %30 = lshr i32 %29, 6
-    br label %31
-
-  31:                                               ; preds = %28, %25, %22, %20
-    %32 = phi i32 [ %30, %28 ], [ %27, %25 ], [ %24, %22 ], [ %21, %20 ]
-    %33 = or i32 %32, %17
-    ret i32 %33
-  }
-
-...
 ---
 name:            foo
-alignment:       16
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-failedISel:      false
 tracksRegLiveness: true
-hasWinCFI:       false
-callsEHReturn:   false
-callsUnwindInit: false
-hasEHCatchret:   false
-hasEHScopes:     false
-hasEHFunclets:   false
-isOutlined:      false
-debugInstrRef:   true
-failsVerification: false
-tracksDebugUserValues: false
-registers:
-  - { id: 0, class: gr32, preferred-register: '' }
-  - { id: 1, class: gr32, preferred-register: '' }
-  - { id: 2, class: gr32, preferred-register: '' }
-  - { id: 3, class: gr32, preferred-register: '' }
-  - { id: 4, class: gr32, preferred-register: '' }
-  - { id: 5, class: gr32, preferred-register: '' }
-  - { id: 6, class: gr32, preferred-register: '' }
-  - { id: 7, class: gr32, preferred-register: '' }
-  - { id: 8, class: gr32, preferred-register: '' }
-  - { id: 9, class: gr32, preferred-register: '' }
-  - { id: 10, class: gr64, preferred-register: '' }
-  - { id: 11, class: gr32, preferred-register: '' }
-  - { id: 12, class: gr64_nosp, preferred-register: '' }
-  - { id: 13, class: gr32, preferred-register: '' }
-  - { id: 14, class: gr32, preferred-register: '' }
-  - { id: 15, class: gr32, preferred-register: '' }
-  - { id: 16, class: gr32, preferred-register: '' }
-  - { id: 17, class: gr32, preferred-register: '' }
-  - { id: 18, class: gr64_nosp, preferred-register: '' }
-  - { id: 19, class: gr32, preferred-register: '' }
-  - { id: 20, class: gr32, preferred-register: '' }
-  - { id: 21, class: gr32, preferred-register: '' }
-  - { id: 22, class: gr32, preferred-register: '' }
-  - { id: 23, class: gr32, preferred-register: '' }
-  - { id: 24, class: gr32, preferred-register: '' }
-liveins:
-  - { reg: '$rdi', virtual-reg: '%10' }
-  - { reg: '$esi', virtual-reg: '%11' }
-frameInfo:
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    1
-  adjustsStack:    false
-  hasCalls:        false
-  stackProtector:  ''
-  functionContext: ''
-  maxCallFrameSize: 4294967295
-  cvBytesOfCalleeSavedRegisters: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-  hasTailCall:     false
-  localFrameSize:  0
-  savePoint:       ''
-  restorePoint:    ''
-fixedStack:      []
-stack:           []
-entry_values:    []
-callSites:       []
-debugValueSubstitutions: []
-constants:       []
-machineFunctionInfo: {}
 jumpTable:
   kind:            block-address
   entries:
     - id:              0
-      blocks:          [ '%bb.1', '%bb.2', '%bb.3', '%bb.4' ]
+      blocks:          [ '%bb.2', '%bb.3', '%bb.4', '%bb.5' ]
     - id:              1
-      blocks:          [ '%bb.7', '%bb.8', '%bb.9', '%bb.10' ]
+      blocks:          [ '%bb.9', '%bb.10', '%bb.11', '%bb.12' ]
 body:             |
   ; LIMIT-LABEL: name: foo
-  ; LIMIT: bb.0 (%ir-block.2):
+  ; LIMIT: bb.0:
   ; LIMIT-NEXT:   successors: %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000), %bb.5(0x20000000)
   ; LIMIT-NEXT:   liveins: $rdi, $esi
   ; LIMIT-NEXT: {{  $}}
@@ -169,81 +23,81 @@ body:             |
   ; LIMIT-NEXT:   [[SHR32ri:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 1, implicit-def dead $eflags
   ; LIMIT-NEXT:   [[AND32ri:%[0-9]+]]:gr32 = AND32ri [[SHR32ri]], 7, implicit-def dead $eflags
   ; LIMIT-NEXT:   [[SUBREG_TO_REG:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, killed [[AND32ri]], %subreg.sub_32bit
-  ; LIMIT-NEXT:   JMP64m $noreg, 8, [[SUBREG_TO_REG]], %jump-table.0, $noreg :: (load (s64) from jump-table)
+  ; LIMIT-NEXT:   JMP64m $noreg, 8, [[SUBREG_TO_REG]], %jump-table.0, $noreg
   ; LIMIT-NEXT: {{  $}}
-  ; LIMIT-NEXT: bb.2 (%ir-block.5):
+  ; LIMIT-NEXT: bb.2:
   ; LIMIT-NEXT:   successors: %bb.7(0x80000000)
   ; LIMIT-NEXT: {{  $}}
-  ; LIMIT-NEXT:   [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  ; LIMIT-NEXT:   [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
   ; LIMIT-NEXT:   JMP_1 %bb.7
   ; LIMIT-NEXT: {{  $}}
-  ; LIMIT-NEXT: bb.3 (%ir-block.7):
+  ; LIMIT-NEXT: bb.3:
   ; LIMIT-NEXT:   successors: %bb.7(0x80000000)
   ; LIMIT-NEXT: {{  $}}
-  ; LIMIT-NEXT:   [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  ; LIMIT-NEXT:   [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
   ; LIMIT-NEXT:   [[SHR32ri1:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm1]], 1, implicit-def dead $eflags
   ; LIMIT-NEXT:   JMP_1 %bb.7
   ; LIMIT-NEXT: {{  $}}
-  ; LIMIT-NEXT: bb.4 (%ir-block.10):
+  ; LIMIT-NEXT: bb.4:
   ; LIMIT-NEXT:   successors: %bb.7(0x80000000)
   ; LIMIT-NEXT: {{  $}}
-  ; LIMIT-NEXT:   [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  ; LIMIT-NEXT:   [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
   ; LIMIT-NEXT:   [[SHR32ri2:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm2]], 2, implicit-def dead $eflags
   ; LIMIT-NEXT:   JMP_1 %bb.7
   ; LIMIT-NEXT: {{  $}}
-  ; LIMIT-NEXT: bb.5 (%ir-block.13):
+  ; LIMIT-NEXT: bb.5:
   ; LIMIT-NEXT:   successors: %bb.7(0x80000000)
   ; LIMIT-NEXT: {{  $}}
-  ; LIMIT-NEXT:   [[MOV32rm3:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  ; LIMIT-NEXT:   [[MOV32rm3:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
   ; LIMIT-NEXT:   [[SHR32ri3:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm3]], 3, implicit-def dead $eflags
   ; LIMIT-NEXT:   JMP_1 %bb.7
   ; LIMIT-NEXT: {{  $}}
-  ; LIMIT-NEXT: bb.6.default.unreachable2:
+  ; LIMIT-NEXT: bb.6:
   ; LIMIT-NEXT:   successors:
   ; LIMIT-NEXT: {{  $}}
-  ; LIMIT-NEXT: bb.7 (%ir-block.16):
+  ; LIMIT-NEXT: bb.7:
   ; LIMIT-NEXT:   successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
   ; LIMIT-NEXT: {{  $}}
   ; LIMIT-NEXT:   [[PHI:%[0-9]+]]:gr32 = PHI [[SHR32ri3]], %bb.5, [[SHR32ri2]], %bb.4, [[SHR32ri1]], %bb.3, [[MOV32rm]], %bb.2
   ; LIMIT-NEXT:   [[SHR32ri4:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
   ; LIMIT-NEXT:   [[AND32ri1:%[0-9]+]]:gr32 = AND32ri [[SHR32ri4]], 7, implicit-def dead $eflags
   ; LIMIT-NEXT:   [[SUBREG_TO_REG1:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, killed [[AND32ri1]], %subreg.sub_32bit
-  ; LIMIT-NEXT:   JMP64m $noreg, 8, [[SUBREG_TO_REG1]], %jump-table.1, $noreg :: (load (s64) from jump-table)
+  ; LIMIT-NEXT:   JMP64m $noreg, 8, [[SUBREG_TO_REG1]], %jump-table.1, $noreg
   ; LIMIT-NEXT: {{  $}}
-  ; LIMIT-NEXT: bb.9 (%ir-block.20):
+  ; LIMIT-NEXT: bb.9:
   ; LIMIT-NEXT:   successors: %bb.13(0x80000000)
   ; LIMIT-NEXT: {{  $}}
-  ; LIMIT-NEXT:   [[MOV32rm4:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  ; LIMIT-NEXT:   [[MOV32rm4:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
   ; LIMIT-NEXT:   JMP_1 %bb.13
   ; LIMIT-NEXT: {{  $}}
-  ; LIMIT-NEXT: bb.10 (%ir-block.22):
+  ; LIMIT-NEXT: bb.10:
   ; LIMIT-NEXT:   successors: %bb.13(0x80000000)
   ; LIMIT-NEXT: {{  $}}
-  ; LIMIT-NEXT:   [[MOV32rm5:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  ; LIMIT-NEXT:   [[MOV32rm5:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
   ; LIMIT-NEXT:   [[SHR32ri5:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm5]], 1, implicit-def dead $eflags
   ; LIMIT-NEXT:   JMP_1 %bb.13
   ; LIMIT-NEXT: {{  $}}
-  ; LIMIT-NEXT: bb.11 (%ir-block.25):
+  ; LIMIT-NEXT: bb.11:
   ; LIMIT-NEXT:   successors: %bb.13(0x80000000)
   ; LIMIT-NEXT: {{  $}}
-  ; LIMIT-NEXT:   [[MOV32rm6:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  ; LIMIT-NEXT:   [[MOV32rm6:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
   ; LIMIT-NEXT:   [[SHR32ri6:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm6]], 2, implicit-def dead $eflags
   ; LIMIT-NEXT:   JMP_1 %bb.13
   ; LIMIT-NEXT: {{  $}}
-  ; LIMIT-NEXT: bb.12 (%ir-block.28):
+  ; LIMIT-NEXT: bb.12:
   ; LIMIT-NEXT:   successors: %bb.13(0x80000000)
   ; LIMIT-NEXT: {{  $}}
-  ; LIMIT-NEXT:   [[MOV32rm7:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  ; LIMIT-NEXT:   [[MOV32rm7:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
   ; LIMIT-NEXT:   [[SHR32ri7:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm7]], 6, implicit-def dead $eflags
   ; LIMIT-NEXT: {{  $}}
-  ; LIMIT-NEXT: bb.13 (%ir-block.31):
+  ; LIMIT-NEXT: bb.13:
   ; LIMIT-NEXT:   [[PHI1:%[0-9]+]]:gr32 = PHI [[SHR32ri7]], %bb.12, [[SHR32ri6]], %bb.11, [[SHR32ri5]], %bb.10, [[MOV32rm4]], %bb.9
   ; LIMIT-NEXT:   [[OR32rr:%[0-9]+]]:gr32 = OR32rr [[PHI1]], [[PHI]], implicit-def dead $eflags
   ; LIMIT-NEXT:   $eax = COPY [[OR32rr]]
   ; LIMIT-NEXT:   RET 0, $eax
   ;
   ; NOLIMIT-LABEL: name: foo
-  ; NOLIMIT: bb.0 (%ir-block.2):
+  ; NOLIMIT: bb.0:
   ; NOLIMIT-NEXT:   successors: %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000), %bb.5(0x20000000)
   ; NOLIMIT-NEXT:   liveins: $rdi, $esi
   ; NOLIMIT-NEXT: {{  $}}
@@ -252,88 +106,87 @@ body:             |
   ; NOLIMIT-NEXT:   [[SHR32ri:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 1, implicit-def dead $eflags
   ; NOLIMIT-NEXT:   [[AND32ri:%[0-9]+]]:gr32 = AND32ri [[SHR32ri]], 7, implicit-def dead $eflags
   ; NOLIMIT-NEXT:   [[SUBREG_TO_REG:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, killed [[AND32ri]], %subreg.sub_32bit
-  ; NOLIMIT-NEXT:   JMP64m $noreg, 8, [[SUBREG_TO_REG]], %jump-table.0, $noreg :: (load (s64) from jump-table)
+  ; NOLIMIT-NEXT:   JMP64m $noreg, 8, [[SUBREG_TO_REG]], %jump-table.0, $noreg
   ; NOLIMIT-NEXT: {{  $}}
-  ; NOLIMIT-NEXT: bb.2 (%ir-block.5):
+  ; NOLIMIT-NEXT: bb.2:
   ; NOLIMIT-NEXT:   successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
   ; NOLIMIT-NEXT: {{  $}}
-  ; NOLIMIT-NEXT:   [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  ; NOLIMIT-NEXT:   [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
   ; NOLIMIT-NEXT:   [[SHR32ri1:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
   ; NOLIMIT-NEXT:   [[AND32ri1:%[0-9]+]]:gr32 = AND32ri [[SHR32ri1]], 7, implicit-def dead $eflags
   ; NOLIMIT-NEXT:   [[SUBREG_TO_REG1:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri1]], %subreg.sub_32bit
-  ; NOLIMIT-NEXT:   JMP64m $noreg, 8, [[SUBREG_TO_REG1]], %jump-table.1, $noreg :: (load (s64) from jump-table)
+  ; NOLIMIT-NEXT:   JMP64m $noreg, 8, [[SUBREG_TO_REG1]], %jump-table.1, $noreg
   ; NOLIMIT-NEXT: {{  $}}
-  ; NOLIMIT-NEXT: bb.3 (%ir-block.7):
+  ; NOLIMIT-NEXT: bb.3:
   ; NOLIMIT-NEXT:   successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
   ; NOLIMIT-NEXT: {{  $}}
-  ; NOLIMIT-NEXT:   [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  ; NOLIMIT-NEXT:   [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
   ; NOLIMIT-NEXT:   [[SHR32ri2:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm1]], 1, implicit-def dead $eflags
   ; NOLIMIT-NEXT:   [[SHR32ri3:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
   ; NOLIMIT-NEXT:   [[AND32ri2:%[0-9]+]]:gr32 = AND32ri [[SHR32ri3]], 7, implicit-def dead $eflags
   ; NOLIMIT-NEXT:   [[SUBREG_TO_REG2:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri2]], %subreg.sub_32bit
-  ; NOLIMIT-NEXT:   JMP64m $noreg, 8, [[SUBREG_TO_REG2]], %jump-table.1, $noreg :: (load (s64) from jump-table)
+  ; NOLIMIT-NEXT:   JMP64m $noreg, 8, [[SUBREG_TO_REG2]], %jump-table.1, $noreg
   ; NOLIMIT-NEXT: {{  $}}
-  ; NOLIMIT-NEXT: bb.4 (%ir-block.10):
+  ; NOLIMIT-NEXT: bb.4:
   ; NOLIMIT-NEXT:   successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
   ; NOLIMIT-NEXT: {{  $}}
-  ; NOLIMIT-NEXT:   [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  ; NOLIMIT-NEXT:   [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
   ; NOLIMIT-NEXT:   [[SHR32ri4:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm2]], 2, implicit-def dead $eflags
   ; NOLIMIT-NEXT:   [[SHR32ri5:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
   ; NOLIMIT-NEXT:   [[AND32ri3:%[0-9]+]]:gr32 = AND32ri [[SHR32ri5]], 7, implicit-def dead $eflags
   ; NOLIMIT-NEXT:   [[SUBREG_TO_REG3:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri3]], %subreg.sub_32bit
-  ; NOLIMIT-NEXT:   JMP64m $noreg, 8, [[SUBREG_TO_REG3]], %jump-table.1, $noreg :: (load (s64) from jump-table)
+  ; NOLIMIT-NEXT:   JMP64m $noreg, 8, [[SUBREG_TO_REG3]], %jump-table.1, $noreg
   ; NOLIMIT-NEXT: {{  $}}
-  ; NOLIMIT-NEXT: bb.5 (%ir-block.13):
+  ; NOLIMIT-NEXT: bb.5:
   ; NOLIMIT-NEXT:   successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000)
   ; NOLIMIT-NEXT: {{  $}}
-  ; NOLIMIT-NEXT:   [[MOV32rm3:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  ; NOLIMIT-NEXT:   [[MOV32rm3:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
   ; NOLIMIT-NEXT:   [[SHR32ri6:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm3]], 3, implicit-def dead $eflags
   ; NOLIMIT-NEXT:   [[SHR32ri7:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags
   ; NOLIMIT-NEXT:   [[AND32ri4:%[0-9]+]]:gr32 = AND32ri [[SHR32ri7]], 7, implicit-def dead $eflags
   ; NOLIMIT-NEXT:   [[SUBREG_TO_REG4:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri4]], %subreg.sub_32bit
-  ; NOLIMIT-NEXT:   JMP64m $noreg, 8, [[SUBREG_TO_REG4]], %jump-table.1, $noreg :: (load (s64) from jump-table)
+  ; NOLIMIT-NEXT:   JMP64m $noreg, 8, [[SUBREG_TO_REG4]], %jump-table.1, $noreg
   ; NOLIMIT-NEXT: {{  $}}
-  ; NOLIMIT-NEXT: bb.6.default.unreachable2:
+  ; NOLIMIT-NEXT: bb.6:
   ; NOLIMIT-NEXT:   successors:
   ; NOLIMIT-NEXT: {{  $}}
-  ; NOLIMIT-NEXT: bb.9 (%ir-block.20):
+  ; NOLIMIT-NEXT: bb.9:
   ; NOLIMIT-NEXT:   successors: %bb.13(0x80000000)
   ; NOLIMIT-NEXT: {{  $}}
   ; NOLIMIT-NEXT:   [[PHI:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5
-  ; NOLIMIT-NEXT:   [[MOV32rm4:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  ; NOLIMIT-NEXT:   [[MOV32rm4:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
   ; NOLIMIT-NEXT:   JMP_1 %bb.13
   ; NOLIMIT-NEXT: {{  $}}
-  ; NOLIMIT-NEXT: bb.10 (%ir-block.22):
+  ; NOLIMIT-NEXT: bb.10:
   ; NOLIMIT-NEXT:   successors: %bb.13(0x80000000)
   ; NOLIMIT-NEXT: {{  $}}
   ; NOLIMIT-NEXT:   [[PHI1:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5
-  ; NOLIMIT-NEXT:   [[MOV32rm5:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  ; NOLIMIT-NEXT:   [[MOV32rm5:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
   ; NOLIMIT-NEXT:   [[SHR32ri8:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm5]], 1, implicit-def dead $eflags
   ; NOLIMIT-NEXT:   JMP_1 %bb.13
   ; NOLIMIT-NEXT: {{  $}}
-  ; NOLIMIT-NEXT: bb.11 (%ir-block.25):
+  ; NOLIMIT-NEXT: bb.11:
   ; NOLIMIT-NEXT:   successors: %bb.13(0x80000000)
   ; NOLIMIT-NEXT: {{  $}}
   ; NOLIMIT-NEXT:   [[PHI2:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5
-  ; NOLIMIT-NEXT:   [[MOV32rm6:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  ; NOLIMIT-NEXT:   [[MOV32rm6:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
   ; NOLIMIT-NEXT:   [[SHR32ri9:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm6]], 2, implicit-def dead $eflags
   ; NOLIMIT-NEXT:   JMP_1 %bb.13
   ; NOLIMIT-NEXT: {{  $}}
-  ; NOLIMIT-NEXT: bb.12 (%ir-block.28):
+  ; NOLIMIT-NEXT: bb.12:
   ; NOLIMIT-NEXT:   successors: %bb.13(0x80000000)
   ; NOLIMIT-NEXT: {{  $}}
   ; NOLIMIT-NEXT:   [[PHI3:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5
-  ; NOLIMIT-NEXT:   [[MOV32rm7:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  ; NOLIMIT-NEXT:   [[MOV32rm7:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg
   ; NOLIMIT-NEXT:   [[SHR32ri10:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm7]], 6, implicit-def dead $eflags
   ; NOLIMIT-NEXT: {{  $}}
-  ; NOLIMIT-NEXT: bb.13 (%ir-block.31):
+  ; NOLIMIT-NEXT: bb.13:
   ; NOLIMIT-NEXT:   [[PHI4:%[0-9]+]]:gr32 = PHI [[PHI]], %bb.9, [[PHI1]], %bb.10, [[PHI2]], %bb.11, [[PHI3]], %bb.12
   ; NOLIMIT-NEXT:   [[PHI5:%[0-9]+]]:gr32 = PHI [[SHR32ri10]], %bb.12, [[SHR32ri9]], %bb.11, [[SHR32ri8]], %bb.10, [[MOV32rm4]], %bb.9
   ; NOLIMIT-NEXT:   [[OR32rr:%[0-9]+]]:gr32 = OR32rr [[PHI5]], [[PHI4]], implicit-def dead $eflags
   ; NOLIMIT-NEXT:   $eax = COPY [[OR32rr]]
   ; NOLIMIT-NEXT:   RET 0, $eax
-  bb.0 (%ir-block.2):
-    successors: %bb.12(0x80000000)
+  bb.0:
     liveins: $rdi, $esi
 
     %11:gr32 = COPY $esi
@@ -342,83 +195,64 @@ body:             |
     %14:gr32 = AND32ri %13, 7, implicit-def dead $eflags
     %12:gr64_nosp = SUBREG_TO_REG 0, killed %14, %subreg.sub_32bit
 
-  bb.12 (%ir-block.2):
-    successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000)
-
-    JMP64m $noreg, 8, %12, %jump-table.0, $noreg :: (load (s64) from jump-table)
+  bb.1:
+    successors: %bb.2, %bb.3, %bb.4, %bb.5
 
-  bb.1 (%ir-block.5):
-    successors: %bb.6(0x80000000)
+    JMP64m $noreg, 8, %12, %jump-table.0, $noreg
 
-    %0:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
-    JMP_1 %bb.6
+  bb.2:
+    %0:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg
+    JMP_1 %bb.7
 
-  bb.2 (%ir-block.7):
-    successors: %bb.6(0x80000000)
-
-    %17:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  bb.3:
+    %17:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg
     %1:gr32 = SHR32ri %17, 1, implicit-def dead $eflags
-    JMP_1 %bb.6
-
-  bb.3 (%ir-block.10):
-    successors: %bb.6(0x80000000)
+    JMP_1 %bb.7
 
-    %16:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  bb.4:
+    %16:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg
     %2:gr32 = SHR32ri %16, 2, implicit-def dead $eflags
-    JMP_1 %bb.6
-
-  bb.4 (%ir-block.13):
-    successors: %bb.6(0x80000000)
+    JMP_1 %bb.7
 
-    %15:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  bb.5:
+    %15:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg
     %3:gr32 = SHR32ri %15, 3, implicit-def dead $eflags
-    JMP_1 %bb.6
+    JMP_1 %bb.7
 
-  bb.5.default.unreachable2:
+  bb.6:
     successors:
 
-
-  bb.6 (%ir-block.16):
-    successors: %bb.13(0x80000000)
-
-    %4:gr32 = PHI %3, %bb.4, %2, %bb.3, %1, %bb.2, %0, %bb.1
+  bb.7:
+    %4:gr32 = PHI %3, %bb.5, %2, %bb.4, %1, %bb.3, %0, %bb.2
     %19:gr32 = SHR32ri %11, 2, implicit-def dead $eflags
     %20:gr32 = AND32ri %19, 7, implicit-def dead $eflags
     %18:gr64_nosp = SUBREG_TO_REG 0, killed %20, %subreg.sub_32bit
 
-  bb.13 (%ir-block.16):
-    successors: %bb.7(0x20000000), %bb.8(0x20000000), %bb.9(0x20000000), %bb.10(0x20000000)
-
-    JMP64m $noreg, 8, %18, %jump-table.1, $noreg :: (load (s64) from jump-table)
+  bb.8:
+    successors: %bb.9, %bb.10, %bb.11, %bb.12
 
-  bb.7 (%ir-block.20):
-    successors: %bb.11(0x80000000)
+    JMP64m $noreg, 8, %18, %jump-table.1, $noreg
 
-    %5:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
-    JMP_1 %bb.11
+  bb.9:
+    %5:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg
+    JMP_1 %bb.13
 
-  bb.8 (%ir-block.22):
-    successors: %bb.11(0x80000000)
-
-    %23:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  bb.10:
+    %23:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg
     %6:gr32 = SHR32ri %23, 1, implicit-def dead $eflags
-    JMP_1 %bb.11
-
-  bb.9 (%ir-block.25):
-    successors: %bb.11(0x80000000)
+    JMP_1 %bb.13
 
-    %22:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  bb.11:
+    %22:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg
     %7:gr32 = SHR32ri %22, 2, implicit-def dead $eflags
-    JMP_1 %bb.11
-
-  bb.10 (%ir-block.28):
-    successors: %bb.11(0x80000000)
+    JMP_1 %bb.13
 
-    %21:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg :: (load (s32) from %ir.0)
+  bb.12:
+    %21:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg
     %8:gr32 = SHR32ri %21, 6, implicit-def dead $eflags
 
-  bb.11 (%ir-block.31):
-    %9:gr32 = PHI %8, %bb.10, %7, %bb.9, %6, %bb.8, %5, %bb.7
+  bb.13:
+    %9:gr32 = PHI %8, %bb.12, %7, %bb.11, %6, %bb.10, %5, %bb.9
     %24:gr32 = OR32rr %9, %4, implicit-def dead $eflags
     $eax = COPY %24
     RET 0, $eax

>From dfc2e9e61225d16365979a516baa5fff68ea739f Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Sat, 24 Feb 2024 20:46:29 +0800
Subject: [PATCH 4/4] Add comments

---
 llvm/lib/CodeGen/TailDuplicator.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp
index c1e32fa7d63692..f5dd21cb927012 100644
--- a/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -577,6 +577,10 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
   if (TailBB.isSuccessor(&TailBB))
     return false;
 
+  // Duplicating a BB which has both multiple predecessors and successors will
+  // result in a complex CFG and also may cause huge amount of PHI nodes. If we
+  // want to remove this limitation, we have to address
+  // https://github.com/llvm/llvm-project/issues/78578.
   if (TailBB.pred_size() > TailDupPredSize &&
       TailBB.succ_size() > TailDupSuccSize)
     return false;



More information about the llvm-commits mailing list