[llvm] 75e50f7 - Use update_llc_test_checks for the switch.ll test; add new test

Tue May 10 10:01:37 PDT 2022

Author: Matthias Braun
Date: 2022-05-10T10:00:10-07:00
New Revision: 75e50f70c6fd461cae915fc4983faf4127a6059f

URL: https://github.com/llvm/llvm-project/commit/75e50f70c6fd461cae915fc4983faf4127a6059f
DIFF: https://github.com/llvm/llvm-project/commit/75e50f70c6fd461cae915fc4983faf4127a6059f.diff

LOG: Use update_llc_test_checks for the switch.ll test; add new test

- Change `switch.ll` test to a style suitable for
  `tools/update_llc_test_checks.py`.
- Precommit test for upcoming changes:
  - Add `switch_i8` to `test/CodeGen/X86/switch.ll`.
  - Add `test/CodeGen/X86/switch-phi-const.ll`.

Differential Revision: https://reviews.llvm.org/D124893

Added: 
    llvm/test/CodeGen/X86/switch-phi-const.ll

Modified: 
    llvm/test/CodeGen/X86/switch.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/X86/switch-phi-const.ll b/llvm/test/CodeGen/X86/switch-phi-const.ll
new file mode 100644
index 0000000000000..d0ea18f524cfc

--- /dev/null
+++ b/llvm/test/CodeGen/X86/switch-phi-const.ll
@@ -0,0 +1,186 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc %s -o - -mtriple=x86_64-- | FileCheck %s
+ at g = global i32 0
+ at effect = global i32 0
+
+define void @switch_phi_const(i32 %x) {
+; CHECK-LABEL: switch_phi_const:
+; CHECK:       # %bb.0: # %bb0
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    decl %edi
+; CHECK-NEXT:    cmpl $54, %edi
+; CHECK-NEXT:    ja .LBB0_8
+; CHECK-NEXT:  # %bb.1: # %bb0
+; CHECK-NEXT:    movl $42, %eax
+; CHECK-NEXT:    movl $13, %edx
+; CHECK-NEXT:    movl $5, %esi
+; CHECK-NEXT:    movl $1, %ecx
+; CHECK-NEXT:    jmpq *.LJTI0_0(,%rdi,8)
+; CHECK-NEXT:  .LBB0_2: # %case_7
+; CHECK-NEXT:    movq g at GOTPCREL(%rip), %rax
+; CHECK-NEXT:    movl (%rax), %ecx
+; CHECK-NEXT:    movq effect at GOTPCREL(%rip), %rax
+; CHECK-NEXT:    movl $7, (%rax)
+; CHECK-NEXT:  .LBB0_3: # %case_1_loop
+; CHECK-NEXT:    movq effect at GOTPCREL(%rip), %rax
+; CHECK-NEXT:    movl $1, (%rax)
+; CHECK-NEXT:    movl %ecx, %esi
+; CHECK-NEXT:  .LBB0_4: # %case_5
+; CHECK-NEXT:    movq effect at GOTPCREL(%rip), %rax
+; CHECK-NEXT:    movl $5, (%rax)
+; CHECK-NEXT:    movl %esi, %edx
+; CHECK-NEXT:  .LBB0_5: # %case_13
+; CHECK-NEXT:    movq effect at GOTPCREL(%rip), %rax
+; CHECK-NEXT:    movl $13, (%rax)
+; CHECK-NEXT:    movl %edx, %eax
+; CHECK-NEXT:  .LBB0_6: # %case_42
+; CHECK-NEXT:    movq effect at GOTPCREL(%rip), %rcx
+; CHECK-NEXT:    movl %eax, (%rcx)
+; CHECK-NEXT:    movl $55, %eax
+; CHECK-NEXT:  .LBB0_7: # %case_55
+; CHECK-NEXT:    movq effect at GOTPCREL(%rip), %rcx
+; CHECK-NEXT:    movl %eax, (%rcx)
+; CHECK-NEXT:  .LBB0_8: # %default
+; CHECK-NEXT:    retq
+bb0:
+  switch i32 %x, label %default [
+  i32 1, label %case_1_loop
+  i32 5, label %case_5
+  i32 7, label %case_7
+  i32 13, label %case_13
+  i32 42, label %case_42
+  i32 55, label %case_55
+  ]
+
+case_1_loop:
+  ; We should replace 1 with %x
+  %x0 = phi i32 [ 1, %bb0 ], [ %x5, %case_7 ]
+  store i32 1, i32* @effect, align 4
+  br label %case_5
+
+case_5:
+  ; We should replace 5 with %x
+  %x1 = phi i32 [ 5, %bb0 ], [ %x0, %case_1_loop ]
+  store i32 5, i32* @effect, align 4
+  br label %case_13
+
+case_13:
+  ; We should replace 13 with %x
+  %x2 = phi i32 [ 13, %bb0 ], [ %x1, %case_5 ]
+  store i32 13, i32* @effect, align 4
+  br label %case_42
+
+case_42:
+  ; We should replace 42 with %x
+  %x3 = phi i32 [ 42, %bb0 ], [ %x2, %case_13 ]
+  store i32 %x3, i32* @effect, align 4
+  br label %case_55
+
+case_55:
+  ; We must not replace any of the PHI arguments!
+  %x4 = phi i32 [ 42, %bb0 ], [ 55, %case_42 ]
+  store i32 %x4, i32* @effect, align 4
+  br label %default
+
+case_7:
+  %x5 = load i32, i32* @g, align 4
+  store i32 7, i32* @effect, align 4
+  br label %case_1_loop
+
+default:
+  ret void
+}
+
+ at g64 = global i64 0
+ at effect64 = global i64 0
+
+define void @switch_trunc_phi_const(i32 %x) {
+; CHECK-LABEL: switch_trunc_phi_const:
+; CHECK:       # %bb.0: # %bb0
+; CHECK-NEXT:    movl $3895, %r8d # imm = 0xF37
+; CHECK-NEXT:    movl $42, %esi
+; CHECK-NEXT:    movl $13, %edx
+; CHECK-NEXT:    movl $5, %eax
+; CHECK-NEXT:    movl $1, %ecx
+; CHECK-NEXT:    decb %dil
+; CHECK-NEXT:    movzbl %dil, %edi
+; CHECK-NEXT:    cmpb $54, %dil
+; CHECK-NEXT:    ja .LBB1_8
+; CHECK-NEXT:  # %bb.1: # %bb0
+; CHECK-NEXT:    jmpq *.LJTI1_0(,%rdi,8)
+; CHECK-NEXT:  .LBB1_8: # %default
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB1_2: # %case_1_loop
+; CHECK-NEXT:    movq effect64 at GOTPCREL(%rip), %rax
+; CHECK-NEXT:    movq $1, (%rax)
+; CHECK-NEXT:    movq %rcx, %rax
+; CHECK-NEXT:  .LBB1_3: # %case_5
+; CHECK-NEXT:    movq effect64 at GOTPCREL(%rip), %rcx
+; CHECK-NEXT:    movq $5, (%rcx)
+; CHECK-NEXT:    movq %rax, %rdx
+; CHECK-NEXT:  .LBB1_4: # %case_13
+; CHECK-NEXT:    movq effect64 at GOTPCREL(%rip), %rax
+; CHECK-NEXT:    movq $13, (%rax)
+; CHECK-NEXT:    movq %rdx, %rsi
+; CHECK-NEXT:  .LBB1_5: # %case_42
+; CHECK-NEXT:    movq effect64 at GOTPCREL(%rip), %rax
+; CHECK-NEXT:    movq %rsi, (%rax)
+; CHECK-NEXT:    movl $55, %r8d
+; CHECK-NEXT:  .LBB1_6: # %case_55
+; CHECK-NEXT:    movq effect64 at GOTPCREL(%rip), %rax
+; CHECK-NEXT:    movq %r8, (%rax)
+; CHECK-NEXT:  .LBB1_7: # %case_7
+; CHECK-NEXT:    movq g64 at GOTPCREL(%rip), %rax
+; CHECK-NEXT:    movq (%rax), %rcx
+; CHECK-NEXT:    movq effect64 at GOTPCREL(%rip), %rax
+; CHECK-NEXT:    movq $7, (%rax)
+; CHECK-NEXT:    jmp .LBB1_2
+bb0:
+  %x_trunc = trunc i32 %x to i8
+  switch i8 %x_trunc, label %default [
+  i8 1, label %case_1_loop
+  i8 5, label %case_5
+  i8 7, label %case_7
+  i8 13, label %case_13
+  i8 42, label %case_42
+  i8 55, label %case_55
+  ]
+
+case_1_loop:
+  ; We should replace 1 with %x
+  %x0 = phi i64 [ 1, %bb0 ], [ %x5, %case_7 ]
+  store i64 1, i64* @effect64, align 4
+  br label %case_5
+
+case_5:
+  ; We should replace 5 with %x
+  %x1 = phi i64 [ 5, %bb0 ], [ %x0, %case_1_loop ]
+  store i64 5, i64* @effect64, align 4
+  br label %case_13
+
+case_13:
+  ; We should replace 13 with %x
+  %x2 = phi i64 [ 13, %bb0 ], [ %x1, %case_5 ]
+  store i64 13, i64* @effect64, align 4
+  br label %case_42
+
+case_42:
+  ; We should replace 42 with %x
+  %x3 = phi i64 [ 42, %bb0 ], [ %x2, %case_13 ]
+  store i64 %x3, i64* @effect64, align 4
+  br label %case_55
+
+case_55:
+  ; We must not replace any of the PHI arguments! (3898 == 0xf00 + 55)
+  %x4 = phi i64 [ 3895, %bb0 ], [ 55, %case_42 ]
+  store i64 %x4, i64* @effect64, align 4
+  br label %case_7
+
+case_7:
+  %x5 = load i64, i64* @g64, align 4
+  store i64 7, i64* @effect64, align 4
+  br label %case_1_loop
+
+default:
+  ret void
+}

diff  --git a/llvm/test/CodeGen/X86/switch.ll b/llvm/test/CodeGen/X86/switch.ll
index 348dc92a8f485..5c8718050d77f 100644
--- a/llvm/test/CodeGen/X86/switch.ll
+++ b/llvm/test/CodeGen/X86/switch.ll
@@ -1,9 +1,57 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=x86_64-linux-gnu %s -o - -jump-table-density=40 -switch-peel-threshold=101 -verify-machineinstrs | FileCheck %s
 ; RUN: llc -mtriple=x86_64-linux-gnu %s -o - -O0 -jump-table-density=40 -verify-machineinstrs | FileCheck --check-prefix=NOOPT %s
 
 declare void @g(i32)
 
+; Should be lowered as a jump table, both with and without optimization.
 define void @basic(i32 %x) {
+; CHECK-LABEL: basic:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    decl %edi
+; CHECK-NEXT:    cmpl $4, %edi
+; CHECK-NEXT:    ja .LBB0_4
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    jmpq *.LJTI0_0(,%rdi,8)
+; CHECK-NEXT:  .LBB0_3: # %bb2
+; CHECK-NEXT:    movl $1, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB0_4: # %return
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB0_2: # %bb0
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+;
+; NOOPT-LABEL: basic:
+; NOOPT:       # %bb.0: # %entry
+; NOOPT-NEXT:    pushq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 16
+; NOOPT-NEXT:    movl %edi, %eax
+; NOOPT-NEXT:    decl %eax
+; NOOPT-NEXT:    movl %eax, %ecx
+; NOOPT-NEXT:    movq %rcx, (%rsp) # 8-byte Spill
+; NOOPT-NEXT:    subl $4, %eax
+; NOOPT-NEXT:    ja .LBB0_4
+; NOOPT-NEXT:  # %bb.5: # %entry
+; NOOPT-NEXT:    movq (%rsp), %rax # 8-byte Reload
+; NOOPT-NEXT:    movq .LJTI0_0(,%rax,8), %rax
+; NOOPT-NEXT:    jmpq *%rax
+; NOOPT-NEXT:  .LBB0_1: # %bb0
+; NOOPT-NEXT:    xorl %edi, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB0_4
+; NOOPT-NEXT:  .LBB0_2: # %bb1
+; NOOPT-NEXT:    movl $1, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB0_4
+; NOOPT-NEXT:  .LBB0_3: # %bb2
+; NOOPT-NEXT:    movl $1, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:  .LBB0_4: # %return
+; NOOPT-NEXT:    popq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 8
+; NOOPT-NEXT:    retq
 entry:
   switch i32 %x, label %return [
     i32 3, label %bb0
@@ -15,23 +63,73 @@ bb0: tail call void @g(i32 0) br label %return
 bb1: tail call void @g(i32 1) br label %return
 bb2: tail call void @g(i32 1) br label %return
 return: ret void
-
-; Lowered as a jump table, both with and without optimization.
-; CHECK-LABEL: basic
-; CHECK: decl
-; CHECK: cmpl $4
-; CHECK: ja
-; CHECK: jmpq *.LJTI
-; NOOPT-LABEL: basic
-; NOOPT: decl
-; NOOPT: subl $4
-; NOOPT: ja
-; NOOPT: movq .LJTI
-; NOOPT: jmpq
 }
 
 ; Should never be lowered as a jump table because of the attribute
 define void @basic_nojumptable(i32 %x) "no-jump-tables"="true" {
+; CHECK-LABEL: basic_nojumptable:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cmpl $3, %edi
+; CHECK-NEXT:    jg .LBB1_4
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    cmpl $1, %edi
+; CHECK-NEXT:    je .LBB1_7
+; CHECK-NEXT:  # %bb.2: # %entry
+; CHECK-NEXT:    cmpl $3, %edi
+; CHECK-NEXT:    jne .LBB1_6
+; CHECK-NEXT:  # %bb.3: # %bb0
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB1_4: # %entry
+; CHECK-NEXT:    cmpl $4, %edi
+; CHECK-NEXT:    je .LBB1_7
+; CHECK-NEXT:  # %bb.5: # %entry
+; CHECK-NEXT:    cmpl $5, %edi
+; CHECK-NEXT:    jne .LBB1_6
+; CHECK-NEXT:  .LBB1_7: # %bb2
+; CHECK-NEXT:    movl $1, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB1_6: # %return
+; CHECK-NEXT:    retq
+;
+; NOOPT-LABEL: basic_nojumptable:
+; NOOPT:       # %bb.0: # %entry
+; NOOPT-NEXT:    pushq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 16
+; NOOPT-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; NOOPT-NEXT:    subl $1, %edi
+; NOOPT-NEXT:    je .LBB1_2
+; NOOPT-NEXT:    jmp .LBB1_5
+; NOOPT-NEXT:  .LBB1_5: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $3, %eax
+; NOOPT-NEXT:    je .LBB1_1
+; NOOPT-NEXT:    jmp .LBB1_6
+; NOOPT-NEXT:  .LBB1_6: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $4, %eax
+; NOOPT-NEXT:    je .LBB1_2
+; NOOPT-NEXT:    jmp .LBB1_7
+; NOOPT-NEXT:  .LBB1_7: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $5, %eax
+; NOOPT-NEXT:    je .LBB1_3
+; NOOPT-NEXT:    jmp .LBB1_4
+; NOOPT-NEXT:  .LBB1_1: # %bb0
+; NOOPT-NEXT:    xorl %edi, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB1_4
+; NOOPT-NEXT:  .LBB1_2: # %bb1
+; NOOPT-NEXT:    movl $1, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB1_4
+; NOOPT-NEXT:  .LBB1_3: # %bb2
+; NOOPT-NEXT:    movl $1, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:  .LBB1_4: # %return
+; NOOPT-NEXT:    popq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 8
+; NOOPT-NEXT:    retq
 entry:
   switch i32 %x, label %return [
     i32 3, label %bb0
@@ -43,14 +141,56 @@ bb0: tail call void @g(i32 0) br label %return
 bb1: tail call void @g(i32 1) br label %return
 bb2: tail call void @g(i32 1) br label %return
 return: ret void
-
-; Lowered as a jump table, both with and without optimization.
-; CHECK-LABEL: basic_nojumptable
-; CHECK-NOT: jmpq *.LJTI
 }
 
 ; Should be lowered as a jump table because of the attribute
 define void @basic_nojumptable_false(i32 %x) "no-jump-tables"="false" {
+; CHECK-LABEL: basic_nojumptable_false:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    decl %edi
+; CHECK-NEXT:    cmpl $4, %edi
+; CHECK-NEXT:    ja .LBB2_4
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    jmpq *.LJTI2_0(,%rdi,8)
+; CHECK-NEXT:  .LBB2_3: # %bb2
+; CHECK-NEXT:    movl $1, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB2_4: # %return
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB2_2: # %bb0
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+;
+; NOOPT-LABEL: basic_nojumptable_false:
+; NOOPT:       # %bb.0: # %entry
+; NOOPT-NEXT:    pushq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 16
+; NOOPT-NEXT:    movl %edi, %eax
+; NOOPT-NEXT:    decl %eax
+; NOOPT-NEXT:    movl %eax, %ecx
+; NOOPT-NEXT:    movq %rcx, (%rsp) # 8-byte Spill
+; NOOPT-NEXT:    subl $4, %eax
+; NOOPT-NEXT:    ja .LBB2_4
+; NOOPT-NEXT:  # %bb.5: # %entry
+; NOOPT-NEXT:    movq (%rsp), %rax # 8-byte Reload
+; NOOPT-NEXT:    movq .LJTI2_0(,%rax,8), %rax
+; NOOPT-NEXT:    jmpq *%rax
+; NOOPT-NEXT:  .LBB2_1: # %bb0
+; NOOPT-NEXT:    xorl %edi, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB2_4
+; NOOPT-NEXT:  .LBB2_2: # %bb1
+; NOOPT-NEXT:    movl $1, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB2_4
+; NOOPT-NEXT:  .LBB2_3: # %bb2
+; NOOPT-NEXT:    movl $1, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:  .LBB2_4: # %return
+; NOOPT-NEXT:    popq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 8
+; NOOPT-NEXT:    retq
 entry:
   switch i32 %x, label %return [
     i32 3, label %bb0
@@ -62,17 +202,55 @@ bb0: tail call void @g(i32 0) br label %return
 bb1: tail call void @g(i32 1) br label %return
 bb2: tail call void @g(i32 1) br label %return
 return: ret void
-
-; Lowered as a jump table, both with and without optimization.
-; CHECK-LABEL: basic_nojumptable_false
-; CHECK: decl
-; CHECK: cmpl $4
-; CHECK: ja
-; CHECK: jmpq *.LJTI
 }
 
 
+; Should be lowered to two range checks.
+; We do this even at -O0, because it's cheap and makes codegen faster.
 define void @simple_ranges(i32 %x) {
+; CHECK-LABEL: simple_ranges:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    leal -100(%rdi), %eax
+; CHECK-NEXT:    cmpl $4, %eax
+; CHECK-NEXT:    jb .LBB3_3
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    cmpl $3, %edi
+; CHECK-NEXT:    ja .LBB3_4
+; CHECK-NEXT:  # %bb.2: # %bb0
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB3_3: # %bb1
+; CHECK-NEXT:    movl $1, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB3_4: # %return
+; CHECK-NEXT:    retq
+;
+; NOOPT-LABEL: simple_ranges:
+; NOOPT:       # %bb.0: # %entry
+; NOOPT-NEXT:    pushq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 16
+; NOOPT-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; NOOPT-NEXT:    subl $4, %edi
+; NOOPT-NEXT:    jb .LBB3_1
+; NOOPT-NEXT:    jmp .LBB3_4
+; NOOPT-NEXT:  .LBB3_4: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    addl $-100, %eax
+; NOOPT-NEXT:    subl $4, %eax
+; NOOPT-NEXT:    jb .LBB3_2
+; NOOPT-NEXT:    jmp .LBB3_3
+; NOOPT-NEXT:  .LBB3_1: # %bb0
+; NOOPT-NEXT:    xorl %edi, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB3_3
+; NOOPT-NEXT:  .LBB3_2: # %bb1
+; NOOPT-NEXT:    movl $1, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:  .LBB3_3: # %return
+; NOOPT-NEXT:    popq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 8
+; NOOPT-NEXT:    retq
 entry:
   switch i32 %x, label %return [
     i32 0, label %bb0
@@ -87,28 +265,73 @@ entry:
 bb0: tail call void @g(i32 0) br label %return
 bb1: tail call void @g(i32 1) br label %return
 return: ret void
-
-
-
-; Should be lowered to two range checks.
-; CHECK-LABEL: simple_ranges
-; CHECK: leal -100
-; CHECK: cmpl $4
-; CHECK: jb
-; CHECK: cmpl $3
-; CHECK: ja
-
-; We do this even at -O0, because it's cheap and makes codegen faster.
-; NOOPT-LABEL: simple_ranges
-; NOOPT: subl $4
-; NOOPT: jb
-; NOOPT: addl $-100
-; NOOPT: subl $4
-; NOOPT: jb
 }
 
 
+; Cases 0-5 could be lowered with two bit tests,
+; but with 6-8, the whole switch is suitable for a jump table.
 define void @jt_is_better(i32 %x) {
+; CHECK-LABEL: jt_is_better:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cmpl $8, %edi
+; CHECK-NEXT:    ja .LBB4_7
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    jmpq *.LJTI4_0(,%rax,8)
+; CHECK-NEXT:  .LBB4_2: # %bb0
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB4_3: # %bb1
+; CHECK-NEXT:    movl $1, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB4_7: # %return
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB4_4: # %bb2
+; CHECK-NEXT:    movl $2, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB4_5: # %bb3
+; CHECK-NEXT:    movl $3, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB4_6: # %bb4
+; CHECK-NEXT:    movl $4, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+;
+; NOOPT-LABEL: jt_is_better:
+; NOOPT:       # %bb.0: # %entry
+; NOOPT-NEXT:    pushq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 16
+; NOOPT-NEXT:    movl %edi, %eax
+; NOOPT-NEXT:    # kill: def $rax killed $eax
+; NOOPT-NEXT:    movq %rax, (%rsp) # 8-byte Spill
+; NOOPT-NEXT:    subl $8, %edi
+; NOOPT-NEXT:    ja .LBB4_6
+; NOOPT-NEXT:  # %bb.7: # %entry
+; NOOPT-NEXT:    movq (%rsp), %rax # 8-byte Reload
+; NOOPT-NEXT:    movq .LJTI4_0(,%rax,8), %rax
+; NOOPT-NEXT:    jmpq *%rax
+; NOOPT-NEXT:  .LBB4_1: # %bb0
+; NOOPT-NEXT:    xorl %edi, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB4_6
+; NOOPT-NEXT:  .LBB4_2: # %bb1
+; NOOPT-NEXT:    movl $1, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB4_6
+; NOOPT-NEXT:  .LBB4_3: # %bb2
+; NOOPT-NEXT:    movl $2, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB4_6
+; NOOPT-NEXT:  .LBB4_4: # %bb3
+; NOOPT-NEXT:    movl $3, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB4_6
+; NOOPT-NEXT:  .LBB4_5: # %bb4
+; NOOPT-NEXT:    movl $4, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:  .LBB4_6: # %return
+; NOOPT-NEXT:    popq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 8
+; NOOPT-NEXT:    retq
 entry:
   switch i32 %x, label %return [
     i32 0, label %bb0
@@ -128,17 +351,101 @@ bb2: tail call void @g(i32 2) br label %return
 bb3: tail call void @g(i32 3) br label %return
 bb4: tail call void @g(i32 4) br label %return
 return: ret void
-
-; Cases 0-5 could be lowered with two bit tests,
-; but with 6-8, the whole switch is suitable for a jump table.
-; CHECK-LABEL: jt_is_better
-; CHECK: cmpl $8
-; CHECK: ja
-; CHECK: jmpq *.LJTI
 }
 
 
+; This could be lowered as a jump table, but bit tests is more efficient.
+; The bit test on 2,5,8 is unnecessary as all cases cover the rage [0, 8].
+; The range check guarantees that cases other than 0,3,6 and 1,4,7 must be
+; in 2,5,8.
 define void @bt_is_better(i32 %x) {
+; CHECK-LABEL: bt_is_better:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cmpl $8, %edi
+; CHECK-NEXT:    ja .LBB5_4
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    movl $73, %eax
+; CHECK-NEXT:    btl %edi, %eax
+; CHECK-NEXT:    jb .LBB5_5
+; CHECK-NEXT:  # %bb.2: # %entry
+; CHECK-NEXT:    movl $146, %eax
+; CHECK-NEXT:    btl %edi, %eax
+; CHECK-NEXT:    jae .LBB5_3
+; CHECK-NEXT:  # %bb.6: # %bb1
+; CHECK-NEXT:    movl $1, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB5_5: # %bb0
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB5_3: # %bb2
+; CHECK-NEXT:    movl $2, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB5_4: # %return
+; CHECK-NEXT:    retq
+;
+; NOOPT-LABEL: bt_is_better:
+; NOOPT:       # %bb.0: # %entry
+; NOOPT-NEXT:    pushq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 16
+; NOOPT-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; NOOPT-NEXT:    testl %edi, %edi
+; NOOPT-NEXT:    je .LBB5_1
+; NOOPT-NEXT:    jmp .LBB5_5
+; NOOPT-NEXT:  .LBB5_5: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $1, %eax
+; NOOPT-NEXT:    je .LBB5_2
+; NOOPT-NEXT:    jmp .LBB5_6
+; NOOPT-NEXT:  .LBB5_6: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $2, %eax
+; NOOPT-NEXT:    je .LBB5_3
+; NOOPT-NEXT:    jmp .LBB5_7
+; NOOPT-NEXT:  .LBB5_7: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $3, %eax
+; NOOPT-NEXT:    je .LBB5_1
+; NOOPT-NEXT:    jmp .LBB5_8
+; NOOPT-NEXT:  .LBB5_8: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $4, %eax
+; NOOPT-NEXT:    je .LBB5_2
+; NOOPT-NEXT:    jmp .LBB5_9
+; NOOPT-NEXT:  .LBB5_9: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $5, %eax
+; NOOPT-NEXT:    je .LBB5_3
+; NOOPT-NEXT:    jmp .LBB5_10
+; NOOPT-NEXT:  .LBB5_10: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $6, %eax
+; NOOPT-NEXT:    je .LBB5_1
+; NOOPT-NEXT:    jmp .LBB5_11
+; NOOPT-NEXT:  .LBB5_11: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $7, %eax
+; NOOPT-NEXT:    je .LBB5_2
+; NOOPT-NEXT:    jmp .LBB5_12
+; NOOPT-NEXT:  .LBB5_12: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $8, %eax
+; NOOPT-NEXT:    je .LBB5_3
+; NOOPT-NEXT:    jmp .LBB5_4
+; NOOPT-NEXT:  .LBB5_1: # %bb0
+; NOOPT-NEXT:    xorl %edi, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB5_4
+; NOOPT-NEXT:  .LBB5_2: # %bb1
+; NOOPT-NEXT:    movl $1, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB5_4
+; NOOPT-NEXT:  .LBB5_3: # %bb2
+; NOOPT-NEXT:    movl $2, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:  .LBB5_4: # %return
+; NOOPT-NEXT:    popq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 8
+; NOOPT-NEXT:    retq
 entry:
   switch i32 %x, label %return [
     i32 0, label %bb0
@@ -155,25 +462,98 @@ bb0: tail call void @g(i32 0) br label %return
 bb1: tail call void @g(i32 1) br label %return
 bb2: tail call void @g(i32 2) br label %return
 return: ret void
-
-; This could be lowered as a jump table, but bit tests is more efficient.
-; CHECK-LABEL: bt_is_better
-; The bit test on 2,5,8 is unnecessary as all cases cover the rage [0, 8].
-; The range check guarantees that cases other than 0,3,6 and 1,4,7 must be
-; in 2,5,8.
-;
-; 73 = 2^0 + 2^3 + 2^6
-; CHECK: movl $73
-; CHECK: btl
-; 146 = 2^1 + 2^4 + 2^7
-; CHECK: movl $146
-; CHECK: btl
-; 292 = 2^2 + 2^5 + 2^8
-; CHECK-NOT: movl $292
-; CHECK-NOT: btl
 }
 
+; This will also be lowered as bit test, but as the range [0,8] is not fully
+; covered (5 missing), the default statement can be jumped to and we end up
+; with one more branch.
 define void @bt_is_better2(i32 %x) {
+; CHECK-LABEL: bt_is_better2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cmpl $8, %edi
+; CHECK-NEXT:    ja .LBB6_7
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    movl $73, %eax
+; CHECK-NEXT:    btl %edi, %eax
+; CHECK-NEXT:    jb .LBB6_5
+; CHECK-NEXT:  # %bb.2: # %entry
+; CHECK-NEXT:    movl $146, %eax
+; CHECK-NEXT:    btl %edi, %eax
+; CHECK-NEXT:    jae .LBB6_3
+; CHECK-NEXT:  # %bb.6: # %bb1
+; CHECK-NEXT:    movl $1, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB6_5: # %bb0
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB6_3: # %entry
+; CHECK-NEXT:    movl $260, %eax # imm = 0x104
+; CHECK-NEXT:    btl %edi, %eax
+; CHECK-NEXT:    jae .LBB6_7
+; CHECK-NEXT:  # %bb.4: # %bb2
+; CHECK-NEXT:    movl $2, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB6_7: # %return
+; CHECK-NEXT:    retq
+;
+; NOOPT-LABEL: bt_is_better2:
+; NOOPT:       # %bb.0: # %entry
+; NOOPT-NEXT:    pushq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 16
+; NOOPT-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; NOOPT-NEXT:    testl %edi, %edi
+; NOOPT-NEXT:    je .LBB6_1
+; NOOPT-NEXT:    jmp .LBB6_5
+; NOOPT-NEXT:  .LBB6_5: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $1, %eax
+; NOOPT-NEXT:    je .LBB6_2
+; NOOPT-NEXT:    jmp .LBB6_6
+; NOOPT-NEXT:  .LBB6_6: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $2, %eax
+; NOOPT-NEXT:    je .LBB6_3
+; NOOPT-NEXT:    jmp .LBB6_7
+; NOOPT-NEXT:  .LBB6_7: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $3, %eax
+; NOOPT-NEXT:    je .LBB6_1
+; NOOPT-NEXT:    jmp .LBB6_8
+; NOOPT-NEXT:  .LBB6_8: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $4, %eax
+; NOOPT-NEXT:    je .LBB6_2
+; NOOPT-NEXT:    jmp .LBB6_9
+; NOOPT-NEXT:  .LBB6_9: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $6, %eax
+; NOOPT-NEXT:    je .LBB6_1
+; NOOPT-NEXT:    jmp .LBB6_10
+; NOOPT-NEXT:  .LBB6_10: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $7, %eax
+; NOOPT-NEXT:    je .LBB6_2
+; NOOPT-NEXT:    jmp .LBB6_11
+; NOOPT-NEXT:  .LBB6_11: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $8, %eax
+; NOOPT-NEXT:    je .LBB6_3
+; NOOPT-NEXT:    jmp .LBB6_4
+; NOOPT-NEXT:  .LBB6_1: # %bb0
+; NOOPT-NEXT:    xorl %edi, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB6_4
+; NOOPT-NEXT:  .LBB6_2: # %bb1
+; NOOPT-NEXT:    movl $1, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB6_4
+; NOOPT-NEXT:  .LBB6_3: # %bb2
+; NOOPT-NEXT:    movl $2, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:  .LBB6_4: # %return
+; NOOPT-NEXT:    popq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 8
+; NOOPT-NEXT:    retq
 entry:
   switch i32 %x, label %return [
     i32 0, label %bb0
@@ -189,24 +569,95 @@ bb0: tail call void @g(i32 0) br label %return
 bb1: tail call void @g(i32 1) br label %return
 bb2: tail call void @g(i32 2) br label %return
 return: ret void
-
-; This will also be lowered as bit test, but as the range [0,8] is not fully
-; covered (5 missing), the default statement can be jumped to and we end up
-; with one more branch.
-; CHECK-LABEL: bt_is_better2
-;
-; 73 = 2^0 + 2^3 + 2^6
-; CHECK: movl $73
-; CHECK: btl
-; 146 = 2^1 + 2^4 + 2^7
-; CHECK: movl $146
-; CHECK: btl
-; 260 = 2^2 + 2^8
-; CHECK: movl $260
-; CHECK: btl
 }
 
 define void @bt_is_better3(i32 %x) {
+; CHECK-LABEL: bt_is_better3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cmpl $18, %edi
+; CHECK-NEXT:    ja .LBB7_7
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    movl $74752, %eax # imm = 0x12400
+; CHECK-NEXT:    btl %edi, %eax
+; CHECK-NEXT:    jb .LBB7_5
+; CHECK-NEXT:  # %bb.2: # %entry
+; CHECK-NEXT:    movl $149504, %eax # imm = 0x24800
+; CHECK-NEXT:    btl %edi, %eax
+; CHECK-NEXT:    jae .LBB7_3
+; CHECK-NEXT:  # %bb.6: # %bb1
+; CHECK-NEXT:    movl $1, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB7_5: # %bb0
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB7_3: # %entry
+; CHECK-NEXT:    movl $266240, %eax # imm = 0x41000
+; CHECK-NEXT:    btl %edi, %eax
+; CHECK-NEXT:    jae .LBB7_7
+; CHECK-NEXT:  # %bb.4: # %bb2
+; CHECK-NEXT:    movl $2, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB7_7: # %return
+; CHECK-NEXT:    retq
+;
+; NOOPT-LABEL: bt_is_better3:
+; NOOPT:       # %bb.0: # %entry
+; NOOPT-NEXT:    pushq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 16
+; NOOPT-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; NOOPT-NEXT:    subl $10, %edi
+; NOOPT-NEXT:    je .LBB7_1
+; NOOPT-NEXT:    jmp .LBB7_5
+; NOOPT-NEXT:  .LBB7_5: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $11, %eax
+; NOOPT-NEXT:    je .LBB7_2
+; NOOPT-NEXT:    jmp .LBB7_6
+; NOOPT-NEXT:  .LBB7_6: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $12, %eax
+; NOOPT-NEXT:    je .LBB7_3
+; NOOPT-NEXT:    jmp .LBB7_7
+; NOOPT-NEXT:  .LBB7_7: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $13, %eax
+; NOOPT-NEXT:    je .LBB7_1
+; NOOPT-NEXT:    jmp .LBB7_8
+; NOOPT-NEXT:  .LBB7_8: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $14, %eax
+; NOOPT-NEXT:    je .LBB7_2
+; NOOPT-NEXT:    jmp .LBB7_9
+; NOOPT-NEXT:  .LBB7_9: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $16, %eax
+; NOOPT-NEXT:    je .LBB7_1
+; NOOPT-NEXT:    jmp .LBB7_10
+; NOOPT-NEXT:  .LBB7_10: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $17, %eax
+; NOOPT-NEXT:    je .LBB7_2
+; NOOPT-NEXT:    jmp .LBB7_11
+; NOOPT-NEXT:  .LBB7_11: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $18, %eax
+; NOOPT-NEXT:    je .LBB7_3
+; NOOPT-NEXT:    jmp .LBB7_4
+; NOOPT-NEXT:  .LBB7_1: # %bb0
+; NOOPT-NEXT:    xorl %edi, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB7_4
+; NOOPT-NEXT:  .LBB7_2: # %bb1
+; NOOPT-NEXT:    movl $1, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB7_4
+; NOOPT-NEXT:  .LBB7_3: # %bb2
+; NOOPT-NEXT:    movl $2, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:  .LBB7_4: # %return
+; NOOPT-NEXT:    popq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 8
+; NOOPT-NEXT:    retq
 entry:
   switch i32 %x, label %return [
     i32 10, label %bb0
@@ -227,21 +678,89 @@ return: ret void
 ; [0, 8], as each value in the range [10, 18] can be represented by bits in a
 ; word. Then we still need a branch to jump to the default statement for the
 ; range [0, 10).
-; CHECK-LABEL: bt_is_better3
-;
 ; 74752 = 2^10 + 2^13 + 2^16
-; CHECK: movl $74752
-; CHECK: btl
 ; 149504 = 2^11 + 2^14 + 2^17
-; CHECK: movl $149504
-; CHECK: btl
 ; 266240 = 2^12 + 2^15 + 2^18
-; CHECK: movl $266240
-; CHECK: btl
 }
 
 
+; Should pivot around 400 for two subtrees of equal size.
 define void @optimal_pivot1(i32 %x) {
+; CHECK-LABEL: optimal_pivot1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cmpl $399, %edi # imm = 0x18F
+; CHECK-NEXT:    jg .LBB8_5
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    cmpl $100, %edi
+; CHECK-NEXT:    je .LBB8_8
+; CHECK-NEXT:  # %bb.2: # %entry
+; CHECK-NEXT:    cmpl $200, %edi
+; CHECK-NEXT:    je .LBB8_9
+; CHECK-NEXT:  # %bb.3: # %entry
+; CHECK-NEXT:    cmpl $300, %edi # imm = 0x12C
+; CHECK-NEXT:    je .LBB8_8
+; CHECK-NEXT:  .LBB8_4: # %return
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB8_5: # %entry
+; CHECK-NEXT:    cmpl $400, %edi # imm = 0x190
+; CHECK-NEXT:    je .LBB8_9
+; CHECK-NEXT:  # %bb.6: # %entry
+; CHECK-NEXT:    cmpl $600, %edi # imm = 0x258
+; CHECK-NEXT:    je .LBB8_9
+; CHECK-NEXT:  # %bb.7: # %entry
+; CHECK-NEXT:    cmpl $500, %edi # imm = 0x1F4
+; CHECK-NEXT:    jne .LBB8_4
+; CHECK-NEXT:  .LBB8_8: # %bb0
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB8_9: # %bb1
+; CHECK-NEXT:    movl $1, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+;
+; NOOPT-LABEL: optimal_pivot1:
+; NOOPT:       # %bb.0: # %entry
+; NOOPT-NEXT:    pushq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 16
+; NOOPT-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; NOOPT-NEXT:    subl $100, %edi
+; NOOPT-NEXT:    je .LBB8_1
+; NOOPT-NEXT:    jmp .LBB8_4
+; NOOPT-NEXT:  .LBB8_4: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $200, %eax
+; NOOPT-NEXT:    je .LBB8_2
+; NOOPT-NEXT:    jmp .LBB8_5
+; NOOPT-NEXT:  .LBB8_5: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $300, %eax # imm = 0x12C
+; NOOPT-NEXT:    je .LBB8_1
+; NOOPT-NEXT:    jmp .LBB8_6
+; NOOPT-NEXT:  .LBB8_6: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $400, %eax # imm = 0x190
+; NOOPT-NEXT:    je .LBB8_2
+; NOOPT-NEXT:    jmp .LBB8_7
+; NOOPT-NEXT:  .LBB8_7: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $500, %eax # imm = 0x1F4
+; NOOPT-NEXT:    je .LBB8_1
+; NOOPT-NEXT:    jmp .LBB8_8
+; NOOPT-NEXT:  .LBB8_8: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $600, %eax # imm = 0x258
+; NOOPT-NEXT:    je .LBB8_2
+; NOOPT-NEXT:    jmp .LBB8_3
+; NOOPT-NEXT:  .LBB8_1: # %bb0
+; NOOPT-NEXT:    xorl %edi, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB8_3
+; NOOPT-NEXT:  .LBB8_2: # %bb1
+; NOOPT-NEXT:    movl $1, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:  .LBB8_3: # %return
+; NOOPT-NEXT:    popq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 8
+; NOOPT-NEXT:    retq
 entry:
   switch i32 %x, label %return [
     i32 100, label %bb0
@@ -255,15 +774,157 @@ entry:
 bb0: tail call void @g(i32 0) br label %return
 bb1: tail call void @g(i32 1) br label %return
 return: ret void
-
-; Should pivot around 400 for two subtrees of equal size.
-; CHECK-LABEL: optimal_pivot1
-; CHECK-NOT: cmpl
-; CHECK: cmpl $399
 }
 
 
+; Should pivot around 300 for two subtrees with two jump tables each.
 define void @optimal_pivot2(i32 %x) {
+; CHECK-LABEL: optimal_pivot2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    cmpl $299, %edi # imm = 0x12B
+; CHECK-NEXT:    jg .LBB9_4
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    leal -100(%rdi), %eax
+; CHECK-NEXT:    cmpl $3, %eax
+; CHECK-NEXT:    jbe .LBB9_12
+; CHECK-NEXT:  # %bb.2: # %entry
+; CHECK-NEXT:    addl $-200, %edi
+; CHECK-NEXT:    cmpl $3, %edi
+; CHECK-NEXT:    ja .LBB9_11
+; CHECK-NEXT:  # %bb.3: # %entry
+; CHECK-NEXT:    jmpq *.LJTI9_1(,%rdi,8)
+; CHECK-NEXT:  .LBB9_4: # %entry
+; CHECK-NEXT:    leal -300(%rdi), %eax
+; CHECK-NEXT:    cmpl $3, %eax
+; CHECK-NEXT:    jbe .LBB9_13
+; CHECK-NEXT:  # %bb.5: # %entry
+; CHECK-NEXT:    addl $-400, %edi # imm = 0xFE70
+; CHECK-NEXT:    cmpl $3, %edi
+; CHECK-NEXT:    ja .LBB9_11
+; CHECK-NEXT:  # %bb.6: # %entry
+; CHECK-NEXT:    jmpq *.LJTI9_3(,%rdi,8)
+; CHECK-NEXT:  .LBB9_12: # %entry
+; CHECK-NEXT:    jmpq *.LJTI9_0(,%rax,8)
+; CHECK-NEXT:  .LBB9_13: # %entry
+; CHECK-NEXT:    jmpq *.LJTI9_2(,%rax,8)
+; CHECK-NEXT:  .LBB9_7: # %bb0
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB9_8: # %bb1
+; CHECK-NEXT:    movl $1, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB9_9: # %bb2
+; CHECK-NEXT:    movl $2, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB9_10: # %bb3
+; CHECK-NEXT:    movl $3, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB9_11: # %return
+; CHECK-NEXT:    retq
+;
+; NOOPT-LABEL: optimal_pivot2:
+; NOOPT:       # %bb.0: # %entry
+; NOOPT-NEXT:    pushq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 16
+; NOOPT-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; NOOPT-NEXT:    subl $100, %edi
+; NOOPT-NEXT:    je .LBB9_1
+; NOOPT-NEXT:    jmp .LBB9_6
+; NOOPT-NEXT:  .LBB9_6: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $101, %eax
+; NOOPT-NEXT:    je .LBB9_2
+; NOOPT-NEXT:    jmp .LBB9_7
+; NOOPT-NEXT:  .LBB9_7: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $102, %eax
+; NOOPT-NEXT:    je .LBB9_3
+; NOOPT-NEXT:    jmp .LBB9_8
+; NOOPT-NEXT:  .LBB9_8: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $103, %eax
+; NOOPT-NEXT:    je .LBB9_4
+; NOOPT-NEXT:    jmp .LBB9_9
+; NOOPT-NEXT:  .LBB9_9: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $200, %eax
+; NOOPT-NEXT:    je .LBB9_1
+; NOOPT-NEXT:    jmp .LBB9_10
+; NOOPT-NEXT:  .LBB9_10: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $201, %eax
+; NOOPT-NEXT:    je .LBB9_2
+; NOOPT-NEXT:    jmp .LBB9_11
+; NOOPT-NEXT:  .LBB9_11: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $202, %eax
+; NOOPT-NEXT:    je .LBB9_3
+; NOOPT-NEXT:    jmp .LBB9_12
+; NOOPT-NEXT:  .LBB9_12: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $203, %eax
+; NOOPT-NEXT:    je .LBB9_4
+; NOOPT-NEXT:    jmp .LBB9_13
+; NOOPT-NEXT:  .LBB9_13: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $300, %eax # imm = 0x12C
+; NOOPT-NEXT:    je .LBB9_1
+; NOOPT-NEXT:    jmp .LBB9_14
+; NOOPT-NEXT:  .LBB9_14: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $301, %eax # imm = 0x12D
+; NOOPT-NEXT:    je .LBB9_2
+; NOOPT-NEXT:    jmp .LBB9_15
+; NOOPT-NEXT:  .LBB9_15: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $302, %eax # imm = 0x12E
+; NOOPT-NEXT:    je .LBB9_3
+; NOOPT-NEXT:    jmp .LBB9_16
+; NOOPT-NEXT:  .LBB9_16: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $303, %eax # imm = 0x12F
+; NOOPT-NEXT:    je .LBB9_4
+; NOOPT-NEXT:    jmp .LBB9_17
+; NOOPT-NEXT:  .LBB9_17: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $400, %eax # imm = 0x190
+; NOOPT-NEXT:    je .LBB9_1
+; NOOPT-NEXT:    jmp .LBB9_18
+; NOOPT-NEXT:  .LBB9_18: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $401, %eax # imm = 0x191
+; NOOPT-NEXT:    je .LBB9_2
+; NOOPT-NEXT:    jmp .LBB9_19
+; NOOPT-NEXT:  .LBB9_19: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $402, %eax # imm = 0x192
+; NOOPT-NEXT:    je .LBB9_3
+; NOOPT-NEXT:    jmp .LBB9_20
+; NOOPT-NEXT:  .LBB9_20: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $403, %eax # imm = 0x193
+; NOOPT-NEXT:    je .LBB9_4
+; NOOPT-NEXT:    jmp .LBB9_5
+; NOOPT-NEXT:  .LBB9_1: # %bb0
+; NOOPT-NEXT:    xorl %edi, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB9_5
+; NOOPT-NEXT:  .LBB9_2: # %bb1
+; NOOPT-NEXT:    movl $1, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB9_5
+; NOOPT-NEXT:  .LBB9_3: # %bb2
+; NOOPT-NEXT:    movl $2, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB9_5
+; NOOPT-NEXT:  .LBB9_4: # %bb3
+; NOOPT-NEXT:    movl $3, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:  .LBB9_5: # %return
+; NOOPT-NEXT:    popq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 8
+; NOOPT-NEXT:    retq
 entry:
   switch i32 %x, label %return [
     i32 100, label %bb0   i32 101, label %bb1   i32 102, label %bb2   i32 103, label %bb3
@@ -277,19 +938,105 @@ bb1: tail call void @g(i32 1) br label %return
 bb2: tail call void @g(i32 2) br label %return
 bb3: tail call void @g(i32 3) br label %return
 return: ret void
-
-; Should pivot around 300 for two subtrees with two jump tables each.
-; CHECK-LABEL: optimal_pivot2
-; CHECK-NOT: cmpl
-; CHECK: cmpl $299
-; CHECK: jmpq *.LJTI
-; CHECK: jmpq *.LJTI
-; CHECK: jmpq *.LJTI
-; CHECK: jmpq *.LJTI
 }
 
 
+; Splitting in the largest gap (between 6 and 12) would yield suboptimal result.
+; Expecting a jump table from 5 to 15.
+; At -O0, we don't build jump tables for only parts of a switch.
 define void @optimal_jump_table1(i32 %x) {
+; CHECK-LABEL: optimal_jump_table1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    leal -5(%rdi), %eax
+; CHECK-NEXT:    cmpl $10, %eax
+; CHECK-NEXT:    ja .LBB10_1
+; CHECK-NEXT:  # %bb.9: # %entry
+; CHECK-NEXT:    jmpq *.LJTI10_0(,%rax,8)
+; CHECK-NEXT:  .LBB10_3: # %bb1
+; CHECK-NEXT:    movl $1, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB10_1: # %entry
+; CHECK-NEXT:    testl %edi, %edi
+; CHECK-NEXT:    jne .LBB10_8
+; CHECK-NEXT:  # %bb.2: # %bb0
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB10_8: # %return
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB10_4: # %bb2
+; CHECK-NEXT:    movl $2, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB10_5: # %bb3
+; CHECK-NEXT:    movl $3, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB10_6: # %bb4
+; CHECK-NEXT:    movl $4, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB10_7: # %bb5
+; CHECK-NEXT:    movl $5, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+;
+; NOOPT-LABEL: optimal_jump_table1:
+; NOOPT:       # %bb.0: # %entry
+; NOOPT-NEXT:    pushq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 16
+; NOOPT-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; NOOPT-NEXT:    testl %edi, %edi
+; NOOPT-NEXT:    je .LBB10_1
+; NOOPT-NEXT:    jmp .LBB10_8
+; NOOPT-NEXT:  .LBB10_8: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $5, %eax
+; NOOPT-NEXT:    je .LBB10_2
+; NOOPT-NEXT:    jmp .LBB10_9
+; NOOPT-NEXT:  .LBB10_9: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $6, %eax
+; NOOPT-NEXT:    je .LBB10_3
+; NOOPT-NEXT:    jmp .LBB10_10
+; NOOPT-NEXT:  .LBB10_10: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $12, %eax
+; NOOPT-NEXT:    je .LBB10_4
+; NOOPT-NEXT:    jmp .LBB10_11
+; NOOPT-NEXT:  .LBB10_11: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $13, %eax
+; NOOPT-NEXT:    je .LBB10_5
+; NOOPT-NEXT:    jmp .LBB10_12
+; NOOPT-NEXT:  .LBB10_12: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $15, %eax
+; NOOPT-NEXT:    je .LBB10_6
+; NOOPT-NEXT:    jmp .LBB10_7
+; NOOPT-NEXT:  .LBB10_1: # %bb0
+; NOOPT-NEXT:    xorl %edi, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB10_7
+; NOOPT-NEXT:  .LBB10_2: # %bb1
+; NOOPT-NEXT:    movl $1, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB10_7
+; NOOPT-NEXT:  .LBB10_3: # %bb2
+; NOOPT-NEXT:    movl $2, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB10_7
+; NOOPT-NEXT:  .LBB10_4: # %bb3
+; NOOPT-NEXT:    movl $3, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB10_7
+; NOOPT-NEXT:  .LBB10_5: # %bb4
+; NOOPT-NEXT:    movl $4, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB10_7
+; NOOPT-NEXT:  .LBB10_6: # %bb5
+; NOOPT-NEXT:    movl $5, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:  .LBB10_7: # %return
+; NOOPT-NEXT:    popq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 8
+; NOOPT-NEXT:    retq
 entry:
   switch i32 %x, label %return [
     i32 0,  label %bb0
@@ -306,32 +1053,107 @@ bb3: tail call void @g(i32 3) br label %return
 bb4: tail call void @g(i32 4) br label %return
 bb5: tail call void @g(i32 5) br label %return
 return: ret void
-
-; Splitting in the largest gap (between 6 and 12) would yield suboptimal result.
-; Expecting a jump table from 5 to 15.
-; CHECK-LABEL: optimal_jump_table1
-; CHECK: leal -5
-; CHECK: cmpl $10
-; CHECK: jmpq *.LJTI
-
-; At -O0, we don't build jump tables for only parts of a switch.
-; NOOPT-LABEL: optimal_jump_table1
-; NOOPT: testl %edi, %edi
-; NOOPT: je
-; NOOPT: subl $5, [[REG:%e[abcd][xi]]]
-; NOOPT: je
-; NOOPT: subl $6, [[REG]]
-; NOOPT: je
-; NOOPT: subl $12, [[REG]]
-; NOOPT: je
-; NOOPT: subl $13, [[REG]]
-; NOOPT: je
-; NOOPT: subl $15, [[REG]]
-; NOOPT: je
 }
 
 
+; Partitioning the cases to the minimum number of dense sets is not good enough.
+; This can be partitioned as {0,1,2,9},{14,15} or {0,1,2},{9,14,15}. The former
+; should be preferred. Expecting a table from 0-9.
 define void @optimal_jump_table2(i32 %x) {
+; CHECK-LABEL: optimal_jump_table2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cmpl $9, %edi
+; CHECK-NEXT:    ja .LBB11_1
+; CHECK-NEXT:  # %bb.10: # %entry
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    jmpq *.LJTI11_0(,%rax,8)
+; CHECK-NEXT:  .LBB11_4: # %bb0
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB11_1: # %entry
+; CHECK-NEXT:    cmpl $14, %edi
+; CHECK-NEXT:    je .LBB11_8
+; CHECK-NEXT:  # %bb.2: # %entry
+; CHECK-NEXT:    cmpl $15, %edi
+; CHECK-NEXT:    jne .LBB11_9
+; CHECK-NEXT:  # %bb.3: # %bb5
+; CHECK-NEXT:    movl $5, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB11_9: # %return
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB11_5: # %bb1
+; CHECK-NEXT:    movl $1, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB11_6: # %bb2
+; CHECK-NEXT:    movl $2, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB11_7: # %bb3
+; CHECK-NEXT:    movl $3, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB11_8: # %bb4
+; CHECK-NEXT:    movl $4, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+;
+; NOOPT-LABEL: optimal_jump_table2:
+; NOOPT:       # %bb.0: # %entry
+; NOOPT-NEXT:    pushq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 16
+; NOOPT-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; NOOPT-NEXT:    testl %edi, %edi
+; NOOPT-NEXT:    je .LBB11_1
+; NOOPT-NEXT:    jmp .LBB11_8
+; NOOPT-NEXT:  .LBB11_8: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $1, %eax
+; NOOPT-NEXT:    je .LBB11_2
+; NOOPT-NEXT:    jmp .LBB11_9
+; NOOPT-NEXT:  .LBB11_9: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $2, %eax
+; NOOPT-NEXT:    je .LBB11_3
+; NOOPT-NEXT:    jmp .LBB11_10
+; NOOPT-NEXT:  .LBB11_10: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $9, %eax
+; NOOPT-NEXT:    je .LBB11_4
+; NOOPT-NEXT:    jmp .LBB11_11
+; NOOPT-NEXT:  .LBB11_11: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $14, %eax
+; NOOPT-NEXT:    je .LBB11_5
+; NOOPT-NEXT:    jmp .LBB11_12
+; NOOPT-NEXT:  .LBB11_12: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $15, %eax
+; NOOPT-NEXT:    je .LBB11_6
+; NOOPT-NEXT:    jmp .LBB11_7
+; NOOPT-NEXT:  .LBB11_1: # %bb0
+; NOOPT-NEXT:    xorl %edi, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB11_7
+; NOOPT-NEXT:  .LBB11_2: # %bb1
+; NOOPT-NEXT:    movl $1, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB11_7
+; NOOPT-NEXT:  .LBB11_3: # %bb2
+; NOOPT-NEXT:    movl $2, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB11_7
+; NOOPT-NEXT:  .LBB11_4: # %bb3
+; NOOPT-NEXT:    movl $3, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB11_7
+; NOOPT-NEXT:  .LBB11_5: # %bb4
+; NOOPT-NEXT:    movl $4, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB11_7
+; NOOPT-NEXT:  .LBB11_6: # %bb5
+; NOOPT-NEXT:    movl $5, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:  .LBB11_7: # %return
+; NOOPT-NEXT:    popq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 8
+; NOOPT-NEXT:    retq
 entry:
   switch i32 %x, label %return [
     i32 0,  label %bb0
@@ -348,17 +1170,113 @@ bb3: tail call void @g(i32 3) br label %return
 bb4: tail call void @g(i32 4) br label %return
 bb5: tail call void @g(i32 5) br label %return
 return: ret void
-
-; Partitioning the cases to the minimum number of dense sets is not good enough.
-; This can be partitioned as {0,1,2,9},{14,15} or {0,1,2},{9,14,15}. The former
-; should be preferred. Expecting a table from 0-9.
-; CHECK-LABEL: optimal_jump_table2
-; CHECK: cmpl $9
-; CHECK: jmpq *.LJTI
 }
 
 
+; Splitting to maximize left-right density sum and gap size would split this
+; between 3 and 10, and then between 20 and 25. It's better to build a table
+; from 1-20.
 define void @optimal_jump_table3(i32 %x) {
+; CHECK-LABEL: optimal_jump_table3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    leal -1(%rdi), %eax
+; CHECK-NEXT:    cmpl $19, %eax
+; CHECK-NEXT:    ja .LBB12_1
+; CHECK-NEXT:  # %bb.3: # %entry
+; CHECK-NEXT:    jmpq *.LJTI12_0(,%rax,8)
+; CHECK-NEXT:  .LBB12_4: # %bb0
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB12_5: # %bb1
+; CHECK-NEXT:    movl $1, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB12_6: # %bb2
+; CHECK-NEXT:    movl $2, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB12_7: # %bb3
+; CHECK-NEXT:    movl $3, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB12_1: # %entry
+; CHECK-NEXT:    cmpl $25, %edi
+; CHECK-NEXT:    jne .LBB12_8
+; CHECK-NEXT:  # %bb.2: # %bb4
+; CHECK-NEXT:    movl $4, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB12_8: # %return
+; CHECK-NEXT:    retq
+;
+; NOOPT-LABEL: optimal_jump_table3:
+; NOOPT:       # %bb.0: # %entry
+; NOOPT-NEXT:    pushq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 16
+; NOOPT-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; NOOPT-NEXT:    subl $1, %edi
+; NOOPT-NEXT:    je .LBB12_1
+; NOOPT-NEXT:    jmp .LBB12_7
+; NOOPT-NEXT:  .LBB12_7: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $2, %eax
+; NOOPT-NEXT:    je .LBB12_2
+; NOOPT-NEXT:    jmp .LBB12_8
+; NOOPT-NEXT:  .LBB12_8: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $3, %eax
+; NOOPT-NEXT:    je .LBB12_3
+; NOOPT-NEXT:    jmp .LBB12_9
+; NOOPT-NEXT:  .LBB12_9: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $10, %eax
+; NOOPT-NEXT:    je .LBB12_4
+; NOOPT-NEXT:    jmp .LBB12_10
+; NOOPT-NEXT:  .LBB12_10: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $13, %eax
+; NOOPT-NEXT:    je .LBB12_1
+; NOOPT-NEXT:    jmp .LBB12_11
+; NOOPT-NEXT:  .LBB12_11: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $14, %eax
+; NOOPT-NEXT:    je .LBB12_2
+; NOOPT-NEXT:    jmp .LBB12_12
+; NOOPT-NEXT:  .LBB12_12: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $15, %eax
+; NOOPT-NEXT:    je .LBB12_3
+; NOOPT-NEXT:    jmp .LBB12_13
+; NOOPT-NEXT:  .LBB12_13: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $20, %eax
+; NOOPT-NEXT:    je .LBB12_4
+; NOOPT-NEXT:    jmp .LBB12_14
+; NOOPT-NEXT:  .LBB12_14: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $25, %eax
+; NOOPT-NEXT:    je .LBB12_5
+; NOOPT-NEXT:    jmp .LBB12_6
+; NOOPT-NEXT:  .LBB12_1: # %bb0
+; NOOPT-NEXT:    xorl %edi, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB12_6
+; NOOPT-NEXT:  .LBB12_2: # %bb1
+; NOOPT-NEXT:    movl $1, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB12_6
+; NOOPT-NEXT:  .LBB12_3: # %bb2
+; NOOPT-NEXT:    movl $2, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB12_6
+; NOOPT-NEXT:  .LBB12_4: # %bb3
+; NOOPT-NEXT:    movl $3, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB12_6
+; NOOPT-NEXT:  .LBB12_5: # %bb4
+; NOOPT-NEXT:    movl $4, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:  .LBB12_6: # %return
+; NOOPT-NEXT:    popq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 8
+; NOOPT-NEXT:    retq
 entry:
   switch i32 %x, label %return [
     i32 1,  label %bb0
@@ -377,18 +1295,75 @@ bb2: tail call void @g(i32 2) br label %return
 bb3: tail call void @g(i32 3) br label %return
 bb4: tail call void @g(i32 4) br label %return
 return: ret void
-
-; Splitting to maximize left-right density sum and gap size would split this
-; between 3 and 10, and then between 20 and 25. It's better to build a table
-; from 1-20.
-; CHECK-LABEL: optimal_jump_table3
-; CHECK: leal -1
-; CHECK: cmpl $19
-; CHECK: jmpq *.LJTI
 }
 
 %struct.S = type { %struct.S*, i32 }
+
+; This will be lowered to a comparison with 4 and then bit tests. Make sure
+; that the phi node in %header gets a value from the comparison block.
 define void @phi_node_trouble(%struct.S* %s) {
+; CHECK-LABEL: phi_node_trouble:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB13_1: # %header
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    testq %rdi, %rdi
+; CHECK-NEXT:    je .LBB13_5
+; CHECK-NEXT:  # %bb.2: # %loop
+; CHECK-NEXT:    # in Loop: Header=BB13_1 Depth=1
+; CHECK-NEXT:    movq (%rdi), %rdi
+; CHECK-NEXT:    movl 8(%rdi), %eax
+; CHECK-NEXT:    cmpl $4, %eax
+; CHECK-NEXT:    je .LBB13_1
+; CHECK-NEXT:  # %bb.3: # %loop
+; CHECK-NEXT:    addl $-25, %eax
+; CHECK-NEXT:    cmpl $44, %eax
+; CHECK-NEXT:    ja .LBB13_5
+; CHECK-NEXT:  # %bb.4: # %loop
+; CHECK-NEXT:    movabsq $17592186046465, %rcx # imm = 0x100000000801
+; CHECK-NEXT:    btq %rax, %rcx
+; CHECK-NEXT:  .LBB13_5: # %exit2
+; CHECK-NEXT:    retq
+;
+; NOOPT-LABEL: phi_node_trouble:
+; NOOPT:       # %bb.0: # %entry
+; NOOPT-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; NOOPT-NEXT:    jmp .LBB13_1
+; NOOPT-NEXT:  .LBB13_1: # %header
+; NOOPT-NEXT:    # =>This Inner Loop Header: Depth=1
+; NOOPT-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; NOOPT-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; NOOPT-NEXT:    cmpq $0, %rax
+; NOOPT-NEXT:    je .LBB13_3
+; NOOPT-NEXT:  # %bb.2: # %loop
+; NOOPT-NEXT:    # in Loop: Header=BB13_1 Depth=1
+; NOOPT-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; NOOPT-NEXT:    movq (%rax), %rax
+; NOOPT-NEXT:    movl 8(%rax), %ecx
+; NOOPT-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; NOOPT-NEXT:    subl $4, %ecx
+; NOOPT-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; NOOPT-NEXT:    je .LBB13_1
+; NOOPT-NEXT:    jmp .LBB13_5
+; NOOPT-NEXT:  .LBB13_5: # %loop
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $25, %eax
+; NOOPT-NEXT:    je .LBB13_4
+; NOOPT-NEXT:    jmp .LBB13_6
+; NOOPT-NEXT:  .LBB13_6: # %loop
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $36, %eax
+; NOOPT-NEXT:    je .LBB13_4
+; NOOPT-NEXT:    jmp .LBB13_7
+; NOOPT-NEXT:  .LBB13_7: # %loop
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $69, %eax
+; NOOPT-NEXT:    je .LBB13_4
+; NOOPT-NEXT:    jmp .LBB13_3
+; NOOPT-NEXT:  .LBB13_3: # %exit
+; NOOPT-NEXT:    retq
+; NOOPT-NEXT:  .LBB13_4: # %exit2
+; NOOPT-NEXT:    retq
 entry:
   br label %header
 header:
@@ -410,17 +1385,23 @@ exit:
   ret void
 exit2:
   ret void
-
-; This will be lowered to a comparison with 4 and then bit tests. Make sure
-; that the phi node in %header gets a value from the comparison block.
-; CHECK-LABEL: phi_node_trouble
-; CHECK: movq (%[[REG1:[a-z]+]]), %[[REG1]]
-; CHECK: movl 8(%[[REG1]]), %[[REG2:[a-z]+]]
-; CHECK: cmpl $4, %[[REG2]]
 }
 
 
+; Branch directly to the default.
+; (In optimized builds the switch is removed earlier.)
 define void @default_only(i32 %x) {
+; CHECK-LABEL: default_only:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    retq
+;
+; NOOPT-LABEL: default_only:
+; NOOPT:       # %bb.0: # %entry
+; NOOPT-NEXT:    jmp .LBB14_2
+; NOOPT-NEXT:  .LBB14_1: # %return
+; NOOPT-NEXT:    retq
+; NOOPT-NEXT:  .LBB14_2: # %sw
+; NOOPT-NEXT:    jmp .LBB14_1
 entry:
   br label %sw
 return:
@@ -428,17 +1409,63 @@ return:
 sw:
   switch i32 %x, label %return [
   ]
-
-; Branch directly to the default.
-; (In optimized builds the switch is removed earlier.)
-; NOOPT-LABEL: default_only
-; NOOPT: .LBB[[L:[A-Z0-9_]+]]:
-; NOOPT-NEXT: retq
-; NOOPT: jmp .LBB[[L]]
 }
 
 
+; Don't infloop on jump tables where the upper bound is the max value of the
+; input type (in this case 127).
 define void @int_max_table_cluster(i8 %x) {
+; CHECK-LABEL: int_max_table_cluster:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addb $64, %dil
+; CHECK-NEXT:    cmpb $-65, %dil
+; CHECK-NEXT:    ja .LBB15_4
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    jmpq *.LJTI15_0(,%rax,8)
+; CHECK-NEXT:  .LBB15_2: # %bb0
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB15_3: # %bb3
+; CHECK-NEXT:    movl $1, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB15_4: # %return
+; CHECK-NEXT:    retq
+;
+; NOOPT-LABEL: int_max_table_cluster:
+; NOOPT:       # %bb.0: # %entry
+; NOOPT-NEXT:    pushq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 16
+; NOOPT-NEXT:    movb %dil, %al
+; NOOPT-NEXT:    addb $64, %al
+; NOOPT-NEXT:    movzbl %al, %ecx
+; NOOPT-NEXT:    # kill: def $rcx killed $ecx
+; NOOPT-NEXT:    movq %rcx, (%rsp) # 8-byte Spill
+; NOOPT-NEXT:    subb $-65, %al
+; NOOPT-NEXT:    ja .LBB15_5
+; NOOPT-NEXT:  # %bb.6: # %entry
+; NOOPT-NEXT:    movq (%rsp), %rax # 8-byte Reload
+; NOOPT-NEXT:    movq .LJTI15_0(,%rax,8), %rax
+; NOOPT-NEXT:    jmpq *%rax
+; NOOPT-NEXT:  .LBB15_1: # %bb0
+; NOOPT-NEXT:    xorl %edi, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB15_5
+; NOOPT-NEXT:  .LBB15_2: # %bb1
+; NOOPT-NEXT:    movl $1, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB15_5
+; NOOPT-NEXT:  .LBB15_3: # %bb2
+; NOOPT-NEXT:    movl $1, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB15_5
+; NOOPT-NEXT:  .LBB15_4: # %bb3
+; NOOPT-NEXT:    movl $1, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:  .LBB15_5: # %return
+; NOOPT-NEXT:    popq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 8
+; NOOPT-NEXT:    retq
 entry:
   switch i8 %x, label %return [
     i8 0, label %bb0 i8 1, label %bb0 i8 2, label %bb0 i8 3, label %bb0
@@ -493,15 +1520,103 @@ bb1: tail call void @g(i32 1) br label %return
 bb2: tail call void @g(i32 1) br label %return
 bb3: tail call void @g(i32 1) br label %return
 return: ret void
-
-; Don't infloop on jump tables where the upper bound is the max value of the
-; input type (in this case 127).
-; CHECK-LABEL: int_max_table_cluster
-; CHECK: jmpq *.LJTI
 }
 
 
+; Cases 1,4,7 have a very large branch weight (which shouldn't overflow), so
+; their bit test should come first. 0,3,6 and 2,5,8,9 both have a weight of 20,
+; but the latter set has more cases, so should be tested for earlier. The bit
+; test on 0,3,6 is unnecessary as all cases cover the range [0, 9]. The range
+; check guarantees that cases other than 1,4,7 and 2,5,8,9 must be in 0,3,6.
 define void @bt_order_by_weight(i32 %x) {
+; CHECK-LABEL: bt_order_by_weight:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cmpl $9, %edi
+; CHECK-NEXT:    ja .LBB16_6
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    movl $146, %eax
+; CHECK-NEXT:    btl %edi, %eax
+; CHECK-NEXT:    jae .LBB16_2
+; CHECK-NEXT:  # %bb.4: # %bb1
+; CHECK-NEXT:    movl $1, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB16_2: # %entry
+; CHECK-NEXT:    movl $804, %eax # imm = 0x324
+; CHECK-NEXT:    btl %edi, %eax
+; CHECK-NEXT:    jae .LBB16_3
+; CHECK-NEXT:  # %bb.5: # %bb2
+; CHECK-NEXT:    movl $2, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB16_3: # %bb0
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB16_6: # %return
+; CHECK-NEXT:    retq
+;
+; NOOPT-LABEL: bt_order_by_weight:
+; NOOPT:       # %bb.0: # %entry
+; NOOPT-NEXT:    pushq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 16
+; NOOPT-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; NOOPT-NEXT:    testl %edi, %edi
+; NOOPT-NEXT:    je .LBB16_1
+; NOOPT-NEXT:    jmp .LBB16_5
+; NOOPT-NEXT:  .LBB16_5: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $1, %eax
+; NOOPT-NEXT:    je .LBB16_2
+; NOOPT-NEXT:    jmp .LBB16_6
+; NOOPT-NEXT:  .LBB16_6: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $2, %eax
+; NOOPT-NEXT:    je .LBB16_3
+; NOOPT-NEXT:    jmp .LBB16_7
+; NOOPT-NEXT:  .LBB16_7: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $3, %eax
+; NOOPT-NEXT:    je .LBB16_1
+; NOOPT-NEXT:    jmp .LBB16_8
+; NOOPT-NEXT:  .LBB16_8: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $4, %eax
+; NOOPT-NEXT:    je .LBB16_2
+; NOOPT-NEXT:    jmp .LBB16_9
+; NOOPT-NEXT:  .LBB16_9: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $5, %eax
+; NOOPT-NEXT:    je .LBB16_3
+; NOOPT-NEXT:    jmp .LBB16_10
+; NOOPT-NEXT:  .LBB16_10: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $6, %eax
+; NOOPT-NEXT:    je .LBB16_1
+; NOOPT-NEXT:    jmp .LBB16_11
+; NOOPT-NEXT:  .LBB16_11: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $7, %eax
+; NOOPT-NEXT:    je .LBB16_2
+; NOOPT-NEXT:    jmp .LBB16_12
+; NOOPT-NEXT:  .LBB16_12: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    addl $-8, %eax
+; NOOPT-NEXT:    subl $2, %eax
+; NOOPT-NEXT:    jb .LBB16_3
+; NOOPT-NEXT:    jmp .LBB16_4
+; NOOPT-NEXT:  .LBB16_1: # %bb0
+; NOOPT-NEXT:    xorl %edi, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB16_4
+; NOOPT-NEXT:  .LBB16_2: # %bb1
+; NOOPT-NEXT:    movl $1, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB16_4
+; NOOPT-NEXT:  .LBB16_3: # %bb2
+; NOOPT-NEXT:    movl $2, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:  .LBB16_4: # %return
+; NOOPT-NEXT:    popq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 8
+; NOOPT-NEXT:    retq
 entry:
   switch i32 %x, label %return [
     i32 0, label %bb0
@@ -519,23 +1634,6 @@ bb0: tail call void @g(i32 0) br label %return
 bb1: tail call void @g(i32 1) br label %return
 bb2: tail call void @g(i32 2) br label %return
 return: ret void
-
-; Cases 1,4,7 have a very large branch weight (which shouldn't overflow), so
-; their bit test should come first. 0,3,6 and 2,5,8,9 both have a weight of 20,
-; but the latter set has more cases, so should be tested for earlier. The bit
-; test on 0,3,6 is unnecessary as all cases cover the range [0, 9]. The range
-; check guarantees that cases other than 1,4,7 and 2,5,8,9 must be in 0,3,6.
-
-; CHECK-LABEL: bt_order_by_weight
-; 146 = 2^1 + 2^4 + 2^7
-; CHECK: movl $146
-; CHECK: btl
-; 292 = 2^2 + 2^5 + 2^8 + 2^9
-; CHECK: movl $804
-; CHECK: btl
-; 73 = 2^0 + 2^3 + 2^6
-; CHECK-NOT: movl $73
-; CHECK-NOT: btl
 }
 
 !1 = !{!"branch_weights",
@@ -548,7 +1646,58 @@ return: ret void
        ; Cases 2,5,8,9:
        i32 0, i32 0, i32 0, i32 20}
 
+
+; Case 200 has the highest weight and should come first. 100 and 300 have the
+; same weight, but 300 goes to the 'next' block, so should be last.
 define void @order_by_weight_and_fallthrough(i32 %x) {
+; CHECK-LABEL: order_by_weight_and_fallthrough:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cmpl $200, %edi
+; CHECK-NEXT:    jne .LBB17_1
+; CHECK-NEXT:  .LBB17_3: # %bb0
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB17_1: # %entry
+; CHECK-NEXT:    cmpl $100, %edi
+; CHECK-NEXT:    je .LBB17_4
+; CHECK-NEXT:  # %bb.2: # %entry
+; CHECK-NEXT:    cmpl $300, %edi # imm = 0x12C
+; CHECK-NEXT:    je .LBB17_3
+; CHECK-NEXT:  # %bb.5: # %return
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB17_4: # %bb1
+; CHECK-NEXT:    movl $1, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+;
+; NOOPT-LABEL: order_by_weight_and_fallthrough:
+; NOOPT:       # %bb.0: # %entry
+; NOOPT-NEXT:    pushq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 16
+; NOOPT-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; NOOPT-NEXT:    subl $100, %edi
+; NOOPT-NEXT:    je .LBB17_2
+; NOOPT-NEXT:    jmp .LBB17_4
+; NOOPT-NEXT:  .LBB17_4: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $200, %eax
+; NOOPT-NEXT:    je .LBB17_1
+; NOOPT-NEXT:    jmp .LBB17_5
+; NOOPT-NEXT:  .LBB17_5: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $300, %eax # imm = 0x12C
+; NOOPT-NEXT:    jne .LBB17_3
+; NOOPT-NEXT:    jmp .LBB17_1
+; NOOPT-NEXT:  .LBB17_1: # %bb0
+; NOOPT-NEXT:    xorl %edi, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB17_3
+; NOOPT-NEXT:  .LBB17_2: # %bb1
+; NOOPT-NEXT:    movl $1, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:  .LBB17_3: # %return
+; NOOPT-NEXT:    popq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 8
+; NOOPT-NEXT:    retq
 entry:
   switch i32 %x, label %return [
     i32 100, label %bb1
@@ -558,13 +1707,6 @@ entry:
 bb0: tail call void @g(i32 0) br label %return
 bb1: tail call void @g(i32 1) br label %return
 return: ret void
-
-; Case 200 has the highest weight and should come first. 100 and 300 have the
-; same weight, but 300 goes to the 'next' block, so should be last.
-; CHECK-LABEL: order_by_weight_and_fallthrough
-; CHECK: cmpl $200
-; CHECK: cmpl $100
-; CHECK: cmpl $300
 }
 
 !2 = !{!"branch_weights",
@@ -578,7 +1720,111 @@ return: ret void
        i32 10}
 
 
+; Make sure to pick a pivot in the middle also with zero-weight cases.
 define void @zero_weight_tree(i32 %x) {
+; CHECK-LABEL: zero_weight_tree:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cmpl $29, %edi
+; CHECK-NEXT:    jg .LBB18_5
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    testl %edi, %edi
+; CHECK-NEXT:    jne .LBB18_2
+; CHECK-NEXT:  # %bb.9: # %bb0
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB18_5: # %entry
+; CHECK-NEXT:    cmpl $50, %edi
+; CHECK-NEXT:    jne .LBB18_6
+; CHECK-NEXT:  # %bb.12: # %bb5
+; CHECK-NEXT:    movl $5, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB18_2: # %entry
+; CHECK-NEXT:    cmpl $10, %edi
+; CHECK-NEXT:    je .LBB18_10
+; CHECK-NEXT:  # %bb.3: # %entry
+; CHECK-NEXT:    cmpl $20, %edi
+; CHECK-NEXT:    jne .LBB18_13
+; CHECK-NEXT:  # %bb.4: # %bb2
+; CHECK-NEXT:    movl $2, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB18_6: # %entry
+; CHECK-NEXT:    cmpl $30, %edi
+; CHECK-NEXT:    je .LBB18_11
+; CHECK-NEXT:  # %bb.7: # %entry
+; CHECK-NEXT:    cmpl $40, %edi
+; CHECK-NEXT:    je .LBB18_8
+; CHECK-NEXT:  .LBB18_13: # %return
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB18_10: # %bb1
+; CHECK-NEXT:    movl $1, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB18_11: # %bb3
+; CHECK-NEXT:    movl $3, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB18_8: # %bb4
+; CHECK-NEXT:    movl $4, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+;
+; NOOPT-LABEL: zero_weight_tree:
+; NOOPT:       # %bb.0: # %entry
+; NOOPT-NEXT:    pushq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 16
+; NOOPT-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; NOOPT-NEXT:    testl %edi, %edi
+; NOOPT-NEXT:    je .LBB18_1
+; NOOPT-NEXT:    jmp .LBB18_8
+; NOOPT-NEXT:  .LBB18_8: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $10, %eax
+; NOOPT-NEXT:    je .LBB18_2
+; NOOPT-NEXT:    jmp .LBB18_9
+; NOOPT-NEXT:  .LBB18_9: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $20, %eax
+; NOOPT-NEXT:    je .LBB18_3
+; NOOPT-NEXT:    jmp .LBB18_10
+; NOOPT-NEXT:  .LBB18_10: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $30, %eax
+; NOOPT-NEXT:    je .LBB18_4
+; NOOPT-NEXT:    jmp .LBB18_11
+; NOOPT-NEXT:  .LBB18_11: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $40, %eax
+; NOOPT-NEXT:    je .LBB18_5
+; NOOPT-NEXT:    jmp .LBB18_12
+; NOOPT-NEXT:  .LBB18_12: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $50, %eax
+; NOOPT-NEXT:    je .LBB18_6
+; NOOPT-NEXT:    jmp .LBB18_7
+; NOOPT-NEXT:  .LBB18_1: # %bb0
+; NOOPT-NEXT:    xorl %edi, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB18_7
+; NOOPT-NEXT:  .LBB18_2: # %bb1
+; NOOPT-NEXT:    movl $1, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB18_7
+; NOOPT-NEXT:  .LBB18_3: # %bb2
+; NOOPT-NEXT:    movl $2, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB18_7
+; NOOPT-NEXT:  .LBB18_4: # %bb3
+; NOOPT-NEXT:    movl $3, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB18_7
+; NOOPT-NEXT:  .LBB18_5: # %bb4
+; NOOPT-NEXT:    movl $4, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB18_7
+; NOOPT-NEXT:  .LBB18_6: # %bb5
+; NOOPT-NEXT:    movl $5, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:  .LBB18_7: # %return
+; NOOPT-NEXT:    popq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 8
+; NOOPT-NEXT:    retq
 entry:
   switch i32 %x, label %return [
     i32 0,  label %bb0
@@ -595,17 +1841,149 @@ bb3: tail call void @g(i32 3) br label %return
 bb4: tail call void @g(i32 4) br label %return
 bb5: tail call void @g(i32 5) br label %return
 return: ret void
-
-; Make sure to pick a pivot in the middle also with zero-weight cases.
-; CHECK-LABEL: zero_weight_tree
-; CHECK-NOT: cmpl
-; CHECK: cmpl $29
 }
 
 !3 = !{!"branch_weights", i32 1, i32 10, i32 0, i32 0, i32 0, i32 0, i32 10}
 
 
+; Without branch probabilities, the pivot would be 40, since that would yield
+; equal-sized sub-trees. When taking weights into account, case 70 becomes the
+; pivot. Since there is room for 3 cases in a leaf, cases 50 and 60 are also
+; included in the right-hand side because that doesn't reduce their rank.
 define void @left_leaning_weight_balanced_tree(i32 %x) {
+; CHECK-LABEL: left_leaning_weight_balanced_tree:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cmpl $49, %edi
+; CHECK-NEXT:    jle .LBB19_1
+; CHECK-NEXT:  # %bb.11: # %entry
+; CHECK-NEXT:    cmpl $70, %edi
+; CHECK-NEXT:    jne .LBB19_12
+; CHECK-NEXT:  .LBB19_14: # %bb6
+; CHECK-NEXT:    movl $6, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB19_1: # %entry
+; CHECK-NEXT:    cmpl $9, %edi
+; CHECK-NEXT:    jg .LBB19_4
+; CHECK-NEXT:  # %bb.2: # %entry
+; CHECK-NEXT:    testl %edi, %edi
+; CHECK-NEXT:    jne .LBB19_18
+; CHECK-NEXT:  # %bb.3: # %bb0
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB19_4: # %entry
+; CHECK-NEXT:    cmpl $29, %edi
+; CHECK-NEXT:    jg .LBB19_8
+; CHECK-NEXT:  # %bb.5: # %entry
+; CHECK-NEXT:    cmpl $10, %edi
+; CHECK-NEXT:    je .LBB19_15
+; CHECK-NEXT:  # %bb.6: # %entry
+; CHECK-NEXT:    cmpl $20, %edi
+; CHECK-NEXT:    jne .LBB19_18
+; CHECK-NEXT:  # %bb.7: # %bb2
+; CHECK-NEXT:    movl $2, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB19_12: # %entry
+; CHECK-NEXT:    cmpl $50, %edi
+; CHECK-NEXT:    je .LBB19_17
+; CHECK-NEXT:  # %bb.13: # %entry
+; CHECK-NEXT:    cmpl $60, %edi
+; CHECK-NEXT:    je .LBB19_14
+; CHECK-NEXT:    jmp .LBB19_18
+; CHECK-NEXT:  .LBB19_8: # %entry
+; CHECK-NEXT:    cmpl $30, %edi
+; CHECK-NEXT:    je .LBB19_16
+; CHECK-NEXT:  # %bb.9: # %entry
+; CHECK-NEXT:    cmpl $40, %edi
+; CHECK-NEXT:    jne .LBB19_18
+; CHECK-NEXT:  # %bb.10: # %bb4
+; CHECK-NEXT:    movl $4, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB19_15: # %bb1
+; CHECK-NEXT:    movl $1, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB19_16: # %bb3
+; CHECK-NEXT:    movl $3, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB19_17: # %bb5
+; CHECK-NEXT:    movl $5, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB19_18: # %return
+; CHECK-NEXT:    retq
+;
+; NOOPT-LABEL: left_leaning_weight_balanced_tree:
+; NOOPT:       # %bb.0: # %entry
+; NOOPT-NEXT:    pushq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 16
+; NOOPT-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; NOOPT-NEXT:    testl %edi, %edi
+; NOOPT-NEXT:    je .LBB19_1
+; NOOPT-NEXT:    jmp .LBB19_9
+; NOOPT-NEXT:  .LBB19_9: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $10, %eax
+; NOOPT-NEXT:    je .LBB19_2
+; NOOPT-NEXT:    jmp .LBB19_10
+; NOOPT-NEXT:  .LBB19_10: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $20, %eax
+; NOOPT-NEXT:    je .LBB19_3
+; NOOPT-NEXT:    jmp .LBB19_11
+; NOOPT-NEXT:  .LBB19_11: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $30, %eax
+; NOOPT-NEXT:    je .LBB19_4
+; NOOPT-NEXT:    jmp .LBB19_12
+; NOOPT-NEXT:  .LBB19_12: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $40, %eax
+; NOOPT-NEXT:    je .LBB19_5
+; NOOPT-NEXT:    jmp .LBB19_13
+; NOOPT-NEXT:  .LBB19_13: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $50, %eax
+; NOOPT-NEXT:    je .LBB19_6
+; NOOPT-NEXT:    jmp .LBB19_14
+; NOOPT-NEXT:  .LBB19_14: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $60, %eax
+; NOOPT-NEXT:    je .LBB19_7
+; NOOPT-NEXT:    jmp .LBB19_15
+; NOOPT-NEXT:  .LBB19_15: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $70, %eax
+; NOOPT-NEXT:    je .LBB19_7
+; NOOPT-NEXT:    jmp .LBB19_8
+; NOOPT-NEXT:  .LBB19_1: # %bb0
+; NOOPT-NEXT:    xorl %edi, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB19_8
+; NOOPT-NEXT:  .LBB19_2: # %bb1
+; NOOPT-NEXT:    movl $1, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB19_8
+; NOOPT-NEXT:  .LBB19_3: # %bb2
+; NOOPT-NEXT:    movl $2, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB19_8
+; NOOPT-NEXT:  .LBB19_4: # %bb3
+; NOOPT-NEXT:    movl $3, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB19_8
+; NOOPT-NEXT:  .LBB19_5: # %bb4
+; NOOPT-NEXT:    movl $4, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB19_8
+; NOOPT-NEXT:  .LBB19_6: # %bb5
+; NOOPT-NEXT:    movl $5, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB19_8
+; NOOPT-NEXT:  .LBB19_7: # %bb6
+; NOOPT-NEXT:    movl $6, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:  .LBB19_8: # %return
+; NOOPT-NEXT:    popq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 8
+; NOOPT-NEXT:    retq
 entry:
   switch i32 %x, label %return [
     i32 0,  label %bb0
@@ -626,21 +2004,145 @@ bb5: tail call void @g(i32 5) br label %return
 bb6: tail call void @g(i32 6) br label %return
 bb7: tail call void @g(i32 7) br label %return
 return: ret void
-
-; Without branch probabilities, the pivot would be 40, since that would yield
-; equal-sized sub-trees. When taking weights into account, case 70 becomes the
-; pivot. Since there is room for 3 cases in a leaf, cases 50 and 60 are also
-; included in the right-hand side because that doesn't reduce their rank.
-
-; CHECK-LABEL: left_leaning_weight_balanced_tree
-; CHECK-NOT: cmpl
-; CHECK: cmpl $49
 }
 
 !4 = !{!"branch_weights", i32 1, i32 10, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1000}
 
 
+; Same as the previous test, except case 50 has higher rank to the left than it
+; would have on the right. Case 60 would have the same rank on both sides, so is
+; moved into the leaf.
 define void @left_leaning_weight_balanced_tree2(i32 %x) {
+; CHECK-LABEL: left_leaning_weight_balanced_tree2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cmpl $59, %edi
+; CHECK-NEXT:    jle .LBB20_1
+; CHECK-NEXT:  # %bb.10: # %entry
+; CHECK-NEXT:    cmpl $70, %edi
+; CHECK-NEXT:    jne .LBB20_11
+; CHECK-NEXT:  .LBB20_12: # %bb6
+; CHECK-NEXT:    movl $6, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB20_1: # %entry
+; CHECK-NEXT:    cmpl $29, %edi
+; CHECK-NEXT:    jle .LBB20_2
+; CHECK-NEXT:  # %bb.6: # %entry
+; CHECK-NEXT:    cmpl $50, %edi
+; CHECK-NEXT:    jne .LBB20_7
+; CHECK-NEXT:  # %bb.16: # %bb5
+; CHECK-NEXT:    movl $5, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB20_11: # %entry
+; CHECK-NEXT:    cmpl $60, %edi
+; CHECK-NEXT:    je .LBB20_12
+; CHECK-NEXT:    jmp .LBB20_17
+; CHECK-NEXT:  .LBB20_2: # %entry
+; CHECK-NEXT:    testl %edi, %edi
+; CHECK-NEXT:    jne .LBB20_3
+; CHECK-NEXT:  # %bb.13: # %bb0
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB20_3: # %entry
+; CHECK-NEXT:    cmpl $10, %edi
+; CHECK-NEXT:    je .LBB20_14
+; CHECK-NEXT:  # %bb.4: # %entry
+; CHECK-NEXT:    cmpl $20, %edi
+; CHECK-NEXT:    jne .LBB20_17
+; CHECK-NEXT:  # %bb.5: # %bb2
+; CHECK-NEXT:    movl $2, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB20_7: # %entry
+; CHECK-NEXT:    cmpl $30, %edi
+; CHECK-NEXT:    je .LBB20_15
+; CHECK-NEXT:  # %bb.8: # %entry
+; CHECK-NEXT:    cmpl $40, %edi
+; CHECK-NEXT:    jne .LBB20_17
+; CHECK-NEXT:  # %bb.9: # %bb4
+; CHECK-NEXT:    movl $4, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB20_14: # %bb1
+; CHECK-NEXT:    movl $1, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB20_15: # %bb3
+; CHECK-NEXT:    movl $3, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB20_17: # %return
+; CHECK-NEXT:    retq
+;
+; NOOPT-LABEL: left_leaning_weight_balanced_tree2:
+; NOOPT:       # %bb.0: # %entry
+; NOOPT-NEXT:    pushq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 16
+; NOOPT-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; NOOPT-NEXT:    testl %edi, %edi
+; NOOPT-NEXT:    je .LBB20_1
+; NOOPT-NEXT:    jmp .LBB20_9
+; NOOPT-NEXT:  .LBB20_9: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $10, %eax
+; NOOPT-NEXT:    je .LBB20_2
+; NOOPT-NEXT:    jmp .LBB20_10
+; NOOPT-NEXT:  .LBB20_10: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $20, %eax
+; NOOPT-NEXT:    je .LBB20_3
+; NOOPT-NEXT:    jmp .LBB20_11
+; NOOPT-NEXT:  .LBB20_11: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $30, %eax
+; NOOPT-NEXT:    je .LBB20_4
+; NOOPT-NEXT:    jmp .LBB20_12
+; NOOPT-NEXT:  .LBB20_12: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $40, %eax
+; NOOPT-NEXT:    je .LBB20_5
+; NOOPT-NEXT:    jmp .LBB20_13
+; NOOPT-NEXT:  .LBB20_13: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $50, %eax
+; NOOPT-NEXT:    je .LBB20_6
+; NOOPT-NEXT:    jmp .LBB20_14
+; NOOPT-NEXT:  .LBB20_14: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $60, %eax
+; NOOPT-NEXT:    je .LBB20_7
+; NOOPT-NEXT:    jmp .LBB20_15
+; NOOPT-NEXT:  .LBB20_15: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $70, %eax
+; NOOPT-NEXT:    je .LBB20_7
+; NOOPT-NEXT:    jmp .LBB20_8
+; NOOPT-NEXT:  .LBB20_1: # %bb0
+; NOOPT-NEXT:    xorl %edi, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB20_8
+; NOOPT-NEXT:  .LBB20_2: # %bb1
+; NOOPT-NEXT:    movl $1, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB20_8
+; NOOPT-NEXT:  .LBB20_3: # %bb2
+; NOOPT-NEXT:    movl $2, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB20_8
+; NOOPT-NEXT:  .LBB20_4: # %bb3
+; NOOPT-NEXT:    movl $3, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB20_8
+; NOOPT-NEXT:  .LBB20_5: # %bb4
+; NOOPT-NEXT:    movl $4, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB20_8
+; NOOPT-NEXT:  .LBB20_6: # %bb5
+; NOOPT-NEXT:    movl $5, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB20_8
+; NOOPT-NEXT:  .LBB20_7: # %bb6
+; NOOPT-NEXT:    movl $6, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:  .LBB20_8: # %return
+; NOOPT-NEXT:    popq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 8
+; NOOPT-NEXT:    retq
 entry:
   switch i32 %x, label %return [
     i32 0,  label %bb0
@@ -661,20 +2163,143 @@ bb5: tail call void @g(i32 5) br label %return
 bb6: tail call void @g(i32 6) br label %return
 bb7: tail call void @g(i32 7) br label %return
 return: ret void
-
-; Same as the previous test, except case 50 has higher rank to the left than it
-; would have on the right. Case 60 would have the same rank on both sides, so is
-; moved into the leaf.
-
-; CHECK-LABEL: left_leaning_weight_balanced_tree2
-; CHECK-NOT: cmpl
-; CHECK: cmpl $59
 }
 
 !5 = !{!"branch_weights", i32 1, i32 10, i32 1, i32 1, i32 1, i32 1, i32 90, i32 70, i32 1000}
 
 
+; Analogous to left_leaning_weight_balanced_tree.
 define void @right_leaning_weight_balanced_tree(i32 %x) {
+; CHECK-LABEL: right_leaning_weight_balanced_tree:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cmpl $19, %edi
+; CHECK-NEXT:    jg .LBB21_4
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    testl %edi, %edi
+; CHECK-NEXT:    jne .LBB21_2
+; CHECK-NEXT:  # %bb.13: # %bb0
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB21_4: # %entry
+; CHECK-NEXT:    cmpl $49, %edi
+; CHECK-NEXT:    jle .LBB21_5
+; CHECK-NEXT:  # %bb.9: # %entry
+; CHECK-NEXT:    cmpl $70, %edi
+; CHECK-NEXT:    jne .LBB21_10
+; CHECK-NEXT:  .LBB21_12: # %bb6
+; CHECK-NEXT:    movl $6, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB21_5: # %entry
+; CHECK-NEXT:    cmpl $20, %edi
+; CHECK-NEXT:    je .LBB21_14
+; CHECK-NEXT:  # %bb.6: # %entry
+; CHECK-NEXT:    cmpl $30, %edi
+; CHECK-NEXT:    je .LBB21_15
+; CHECK-NEXT:  # %bb.7: # %entry
+; CHECK-NEXT:    cmpl $40, %edi
+; CHECK-NEXT:    jne .LBB21_17
+; CHECK-NEXT:  # %bb.8: # %bb4
+; CHECK-NEXT:    movl $4, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB21_10: # %entry
+; CHECK-NEXT:    cmpl $50, %edi
+; CHECK-NEXT:    je .LBB21_16
+; CHECK-NEXT:  # %bb.11: # %entry
+; CHECK-NEXT:    cmpl $60, %edi
+; CHECK-NEXT:    je .LBB21_12
+; CHECK-NEXT:    jmp .LBB21_17
+; CHECK-NEXT:  .LBB21_14: # %bb2
+; CHECK-NEXT:    movl $2, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB21_15: # %bb3
+; CHECK-NEXT:    movl $3, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB21_2: # %entry
+; CHECK-NEXT:    cmpl $10, %edi
+; CHECK-NEXT:    jne .LBB21_17
+; CHECK-NEXT:  # %bb.3: # %bb1
+; CHECK-NEXT:    movl $1, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB21_17: # %return
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB21_16: # %bb5
+; CHECK-NEXT:    movl $5, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+;
+; NOOPT-LABEL: right_leaning_weight_balanced_tree:
+; NOOPT:       # %bb.0: # %entry
+; NOOPT-NEXT:    pushq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 16
+; NOOPT-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; NOOPT-NEXT:    testl %edi, %edi
+; NOOPT-NEXT:    je .LBB21_1
+; NOOPT-NEXT:    jmp .LBB21_9
+; NOOPT-NEXT:  .LBB21_9: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $10, %eax
+; NOOPT-NEXT:    je .LBB21_2
+; NOOPT-NEXT:    jmp .LBB21_10
+; NOOPT-NEXT:  .LBB21_10: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $20, %eax
+; NOOPT-NEXT:    je .LBB21_3
+; NOOPT-NEXT:    jmp .LBB21_11
+; NOOPT-NEXT:  .LBB21_11: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $30, %eax
+; NOOPT-NEXT:    je .LBB21_4
+; NOOPT-NEXT:    jmp .LBB21_12
+; NOOPT-NEXT:  .LBB21_12: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $40, %eax
+; NOOPT-NEXT:    je .LBB21_5
+; NOOPT-NEXT:    jmp .LBB21_13
+; NOOPT-NEXT:  .LBB21_13: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $50, %eax
+; NOOPT-NEXT:    je .LBB21_6
+; NOOPT-NEXT:    jmp .LBB21_14
+; NOOPT-NEXT:  .LBB21_14: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $60, %eax
+; NOOPT-NEXT:    je .LBB21_7
+; NOOPT-NEXT:    jmp .LBB21_15
+; NOOPT-NEXT:  .LBB21_15: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $70, %eax
+; NOOPT-NEXT:    je .LBB21_7
+; NOOPT-NEXT:    jmp .LBB21_8
+; NOOPT-NEXT:  .LBB21_1: # %bb0
+; NOOPT-NEXT:    xorl %edi, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB21_8
+; NOOPT-NEXT:  .LBB21_2: # %bb1
+; NOOPT-NEXT:    movl $1, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB21_8
+; NOOPT-NEXT:  .LBB21_3: # %bb2
+; NOOPT-NEXT:    movl $2, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB21_8
+; NOOPT-NEXT:  .LBB21_4: # %bb3
+; NOOPT-NEXT:    movl $3, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB21_8
+; NOOPT-NEXT:  .LBB21_5: # %bb4
+; NOOPT-NEXT:    movl $4, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB21_8
+; NOOPT-NEXT:  .LBB21_6: # %bb5
+; NOOPT-NEXT:    movl $5, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB21_8
+; NOOPT-NEXT:  .LBB21_7: # %bb6
+; NOOPT-NEXT:    movl $6, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:  .LBB21_8: # %return
+; NOOPT-NEXT:    popq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 8
+; NOOPT-NEXT:    retq
 entry:
   switch i32 %x, label %return [
     i32 0,  label %bb0
@@ -695,18 +2320,106 @@ bb5: tail call void @g(i32 5) br label %return
 bb6: tail call void @g(i32 6) br label %return
 bb7: tail call void @g(i32 7) br label %return
 return: ret void
-
-; Analogous to left_leaning_weight_balanced_tree.
-
-; CHECK-LABEL: right_leaning_weight_balanced_tree
-; CHECK-NOT: cmpl
-; CHECK: cmpl $19
 }
 
 !6 = !{!"branch_weights", i32 1, i32 1000, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 10}
 
 
+; If the tree were balanced based on number of clusters, {0-3,100} would go on
+; the left and {200,300} on the right. However, the jump table weights as much
+; as its components, so 100 is selected as the pivot.
 define void @jump_table_affects_balance(i32 %x) {
+; CHECK-LABEL: jump_table_affects_balance:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cmpl $99, %edi
+; CHECK-NEXT:    jg .LBB22_3
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    cmpl $3, %edi
+; CHECK-NEXT:    ja .LBB22_10
+; CHECK-NEXT:  # %bb.2: # %entry
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    jmpq *.LJTI22_0(,%rax,8)
+; CHECK-NEXT:  .LBB22_9: # %bb3
+; CHECK-NEXT:    movl $3, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB22_3: # %entry
+; CHECK-NEXT:    cmpl $300, %edi # imm = 0x12C
+; CHECK-NEXT:    je .LBB22_8
+; CHECK-NEXT:  # %bb.4: # %entry
+; CHECK-NEXT:    cmpl $200, %edi
+; CHECK-NEXT:    je .LBB22_7
+; CHECK-NEXT:  # %bb.5: # %entry
+; CHECK-NEXT:    cmpl $100, %edi
+; CHECK-NEXT:    jne .LBB22_10
+; CHECK-NEXT:  .LBB22_6: # %bb0
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB22_8: # %bb2
+; CHECK-NEXT:    movl $2, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB22_7: # %bb1
+; CHECK-NEXT:    movl $1, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB22_10: # %return
+; CHECK-NEXT:    retq
+;
+; NOOPT-LABEL: jump_table_affects_balance:
+; NOOPT:       # %bb.0: # %entry
+; NOOPT-NEXT:    pushq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 16
+; NOOPT-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; NOOPT-NEXT:    testl %edi, %edi
+; NOOPT-NEXT:    je .LBB22_1
+; NOOPT-NEXT:    jmp .LBB22_6
+; NOOPT-NEXT:  .LBB22_6: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $1, %eax
+; NOOPT-NEXT:    je .LBB22_2
+; NOOPT-NEXT:    jmp .LBB22_7
+; NOOPT-NEXT:  .LBB22_7: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $2, %eax
+; NOOPT-NEXT:    je .LBB22_3
+; NOOPT-NEXT:    jmp .LBB22_8
+; NOOPT-NEXT:  .LBB22_8: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $3, %eax
+; NOOPT-NEXT:    je .LBB22_4
+; NOOPT-NEXT:    jmp .LBB22_9
+; NOOPT-NEXT:  .LBB22_9: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $100, %eax
+; NOOPT-NEXT:    je .LBB22_1
+; NOOPT-NEXT:    jmp .LBB22_10
+; NOOPT-NEXT:  .LBB22_10: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $200, %eax
+; NOOPT-NEXT:    je .LBB22_2
+; NOOPT-NEXT:    jmp .LBB22_11
+; NOOPT-NEXT:  .LBB22_11: # %entry
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $300, %eax # imm = 0x12C
+; NOOPT-NEXT:    je .LBB22_3
+; NOOPT-NEXT:    jmp .LBB22_5
+; NOOPT-NEXT:  .LBB22_1: # %bb0
+; NOOPT-NEXT:    xorl %edi, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB22_5
+; NOOPT-NEXT:  .LBB22_2: # %bb1
+; NOOPT-NEXT:    movl $1, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB22_5
+; NOOPT-NEXT:  .LBB22_3: # %bb2
+; NOOPT-NEXT:    movl $2, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB22_5
+; NOOPT-NEXT:  .LBB22_4: # %bb3
+; NOOPT-NEXT:    movl $3, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:  .LBB22_5: # %return
+; NOOPT-NEXT:    popq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 8
+; NOOPT-NEXT:    retq
 entry:
   switch i32 %x, label %return [
     ; Jump table:
@@ -724,17 +2437,58 @@ bb1: tail call void @g(i32 1) br label %return
 bb2: tail call void @g(i32 2) br label %return
 bb3: tail call void @g(i32 3) br label %return
 return: ret void
-
-; CHECK-LABEL: jump_table_affects_balance
-; If the tree were balanced based on number of clusters, {0-3,100} would go on
-; the left and {200,300} on the right. However, the jump table weights as much
-; as its components, so 100 is selected as the pivot.
-; CHECK-NOT: cmpl
-; CHECK: cmpl $99
 }
 
 
+; Don't assert due to truncating the bitwidth (64) to i4 when checking
+; that the bit-test range fits in a word.
 define void @pr23738(i4 %x) {
+; CHECK-LABEL: pr23738:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    andb $15, %al
+; CHECK-NEXT:    cmpb $11, %al
+; CHECK-NEXT:    ja .LBB23_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    andl $15, %edi
+; CHECK-NEXT:    movl $2051, %eax # imm = 0x803
+; CHECK-NEXT:    btq %rdi, %rax
+; CHECK-NEXT:    jae .LBB23_2
+; CHECK-NEXT:  # %bb.3: # %bb1
+; CHECK-NEXT:    movl $1, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB23_2: # %bb0
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+;
+; NOOPT-LABEL: pr23738:
+; NOOPT:       # %bb.0: # %entry
+; NOOPT-NEXT:    pushq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 16
+; NOOPT-NEXT:    movb %dil, %al
+; NOOPT-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
+; NOOPT-NEXT:    andb $15, %al
+; NOOPT-NEXT:    subb $11, %al
+; NOOPT-NEXT:    je .LBB23_2
+; NOOPT-NEXT:    jmp .LBB23_4
+; NOOPT-NEXT:  .LBB23_4: # %entry
+; NOOPT-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; NOOPT-NEXT:    andb $15, %al
+; NOOPT-NEXT:    subb $2, %al
+; NOOPT-NEXT:    jb .LBB23_2
+; NOOPT-NEXT:    jmp .LBB23_1
+; NOOPT-NEXT:  .LBB23_1: # %bb0
+; NOOPT-NEXT:    xorl %edi, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB23_3
+; NOOPT-NEXT:  .LBB23_2: # %bb1
+; NOOPT-NEXT:    movl $1, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:  .LBB23_3: # %return
+; NOOPT-NEXT:    popq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 8
+; NOOPT-NEXT:    retq
 entry:
   switch i4 %x, label %bb0 [
     i4 0, label %bb1
@@ -744,12 +2498,73 @@ entry:
 bb0: tail call void @g(i32 0) br label %return
 bb1: tail call void @g(i32 1) br label %return
 return: ret void
-; Don't assert due to truncating the bitwidth (64) to i4 when checking
-; that the bit-test range fits in a word.
 }
 
 
+; The switch is lowered with bit tests. Since the case range is contiguous, the
+; second bit test is redundant and can be skipped. Check that we don't update
+; the phi node with an incoming value from the MBB of the skipped bit test
+; (-verify-machine-instrs cathces this).
 define i32 @pr27135(i32 %i) {
+; CHECK-LABEL: pr27135:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    testb %al, %al
+; CHECK-NEXT:    jne .LBB24_5
+; CHECK-NEXT:  # %bb.1: # %sw
+; CHECK-NEXT:    movl $1, %eax
+; CHECK-NEXT:    addl $-96, %edi
+; CHECK-NEXT:    cmpl $5, %edi
+; CHECK-NEXT:    jbe .LBB24_2
+; CHECK-NEXT:  .LBB24_5: # %end
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB24_2: # %sw
+; CHECK-NEXT:    movl $19, %eax
+; CHECK-NEXT:    btl %edi, %eax
+; CHECK-NEXT:    jae .LBB24_3
+; CHECK-NEXT:  # %bb.4: # %sw.bb2
+; CHECK-NEXT:  .LBB24_3: # %sw.bb
+;
+; NOOPT-LABEL: pr27135:
+; NOOPT:       # %bb.0: # %entry
+; NOOPT-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; NOOPT-NEXT:    xorl %eax, %eax
+; NOOPT-NEXT:    # implicit-def: $cl
+; NOOPT-NEXT:    testb $1, %cl
+; NOOPT-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; NOOPT-NEXT:    jne .LBB24_1
+; NOOPT-NEXT:    jmp .LBB24_4
+; NOOPT-NEXT:  .LBB24_1: # %sw
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    movl $1, %ecx
+; NOOPT-NEXT:    movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; NOOPT-NEXT:    addl $-96, %eax
+; NOOPT-NEXT:    subl $2, %eax
+; NOOPT-NEXT:    jb .LBB24_3
+; NOOPT-NEXT:    jmp .LBB24_5
+; NOOPT-NEXT:  .LBB24_5: # %sw
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    addl $-98, %eax
+; NOOPT-NEXT:    subl $2, %eax
+; NOOPT-NEXT:    jb .LBB24_2
+; NOOPT-NEXT:    jmp .LBB24_6
+; NOOPT-NEXT:  .LBB24_6: # %sw
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    subl $100, %eax
+; NOOPT-NEXT:    je .LBB24_3
+; NOOPT-NEXT:    jmp .LBB24_7
+; NOOPT-NEXT:  .LBB24_7: # %sw
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload
+; NOOPT-NEXT:    subl $101, %ecx
+; NOOPT-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; NOOPT-NEXT:    jne .LBB24_4
+; NOOPT-NEXT:    jmp .LBB24_2
+; NOOPT-NEXT:  .LBB24_2: # %sw.bb
+; NOOPT-NEXT:  .LBB24_3: # %sw.bb2
+; NOOPT-NEXT:  .LBB24_4: # %end
+; NOOPT-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
+; NOOPT-NEXT:    retq
 entry:
   br i1 undef, label %sw, label %end
 sw:
@@ -768,18 +2583,50 @@ sw.bb2:
 end:
   %p = phi i32 [ 1, %sw ], [ 0, %entry ]
   ret i32 %p
-
-; CHECK-LABEL: pr27135:
-; The switch is lowered with bit tests. Since the case range is contiguous, the
-; second bit test is redundant and can be skipped. Check that we don't update
-; the phi node with an incoming value from the MBB of the skipped bit test
-; (-verify-machine-instrs cathces this).
-; CHECK: btl
-; CHECK-NOT: btl
 }
 
 
+; Since the default is unreachable, either cluster will be reached.
+; Only one comparison should be emitted.
 define void @range_with_unreachable_fallthrough(i32 %i) {
+; CHECK-LABEL: range_with_unreachable_fallthrough:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addl $-4, %edi
+; CHECK-NEXT:    cmpl $3, %edi
+; CHECK-NEXT:    jae .LBB25_1
+; CHECK-NEXT:  # %bb.2: # %bb2
+; CHECK-NEXT:    movl $1, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB25_1: # %bb1
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+;
+; NOOPT-LABEL: range_with_unreachable_fallthrough:
+; NOOPT:       # %bb.0: # %entry
+; NOOPT-NEXT:    pushq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 16
+; NOOPT-NEXT:    movl %edi, %eax
+; NOOPT-NEXT:    decl %eax
+; NOOPT-NEXT:    subl $3, %eax
+; NOOPT-NEXT:    jb .LBB25_1
+; NOOPT-NEXT:    jmp .LBB25_5
+; NOOPT-NEXT:  .LBB25_5: # %entry
+; NOOPT-NEXT:    jmp .LBB25_2
+; NOOPT-NEXT:  .LBB25_1: # %bb1
+; NOOPT-NEXT:    xorl %edi, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB25_4
+; NOOPT-NEXT:  .LBB25_2: # %bb2
+; NOOPT-NEXT:    movl $1, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB25_4
+; NOOPT-NEXT:  # %bb.3: # %default
+; NOOPT-NEXT:    .cfi_def_cfa_offset 8
+; NOOPT-NEXT:  .LBB25_4: # %return
+; NOOPT-NEXT:    .cfi_def_cfa_offset 16
+; NOOPT-NEXT:    popq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 8
+; NOOPT-NEXT:    retq
 entry:
   switch i32 %i, label %default [
     i32 1, label %bb1
@@ -795,12 +2642,127 @@ default: unreachable
 
 return:
   ret void
+}
 
-; CHECK-LABEL: range_with_unreachable_fallthrough:
-; Since the default is unreachable, either cluster will be reached.
-; Only one comparison should be emitted.
-; CHECK: cmpl
-; CHECK-NOT: cmpl
-; CHECK: jmp g
-; CHECK: jmp g
+
+; A switch over a small type (i8) should be extended to avoid truncation
+; instructions.
+define void @switch_i8(i32 %a) {
+; CHECK-LABEL: switch_i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    andb $127, %dil
+; CHECK-NEXT:    leal -1(%rdi), %eax
+; CHECK-NEXT:    cmpb $8, %al
+; CHECK-NEXT:    ja .LBB26_1
+; CHECK-NEXT:  # %bb.10:
+; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    jmpq *.LJTI26_0(,%rax,8)
+; CHECK-NEXT:  .LBB26_4: # %bb0
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB26_1:
+; CHECK-NEXT:    cmpb $13, %dil
+; CHECK-NEXT:    je .LBB26_8
+; CHECK-NEXT:  # %bb.2:
+; CHECK-NEXT:    cmpb $42, %dil
+; CHECK-NEXT:    jne .LBB26_9
+; CHECK-NEXT:  # %bb.3: # %bb5
+; CHECK-NEXT:    movl $5, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB26_9: # %return
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB26_5: # %bb1
+; CHECK-NEXT:    movl $1, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB26_6: # %bb2
+; CHECK-NEXT:    movl $2, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB26_7: # %bb3
+; CHECK-NEXT:    movl $3, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB26_8: # %bb4
+; CHECK-NEXT:    movl $4, %edi
+; CHECK-NEXT:    jmp g at PLT # TAILCALL
+;
+; NOOPT-LABEL: switch_i8:
+; NOOPT:       # %bb.0:
+; NOOPT-NEXT:    pushq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 16
+; NOOPT-NEXT:    movb %dil, %al
+; NOOPT-NEXT:    andb $127, %al
+; NOOPT-NEXT:    movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
+; NOOPT-NEXT:    subb $1, %al
+; NOOPT-NEXT:    je .LBB26_1
+; NOOPT-NEXT:    jmp .LBB26_8
+; NOOPT-NEXT:  .LBB26_8:
+; NOOPT-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; NOOPT-NEXT:    subb $3, %al
+; NOOPT-NEXT:    je .LBB26_2
+; NOOPT-NEXT:    jmp .LBB26_9
+; NOOPT-NEXT:  .LBB26_9:
+; NOOPT-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; NOOPT-NEXT:    subb $7, %al
+; NOOPT-NEXT:    je .LBB26_3
+; NOOPT-NEXT:    jmp .LBB26_10
+; NOOPT-NEXT:  .LBB26_10:
+; NOOPT-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; NOOPT-NEXT:    subb $9, %al
+; NOOPT-NEXT:    je .LBB26_4
+; NOOPT-NEXT:    jmp .LBB26_11
+; NOOPT-NEXT:  .LBB26_11:
+; NOOPT-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; NOOPT-NEXT:    subb $13, %al
+; NOOPT-NEXT:    je .LBB26_5
+; NOOPT-NEXT:    jmp .LBB26_12
+; NOOPT-NEXT:  .LBB26_12:
+; NOOPT-NEXT:    movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload
+; NOOPT-NEXT:    subb $42, %al
+; NOOPT-NEXT:    je .LBB26_6
+; NOOPT-NEXT:    jmp .LBB26_7
+; NOOPT-NEXT:  .LBB26_1: # %bb0
+; NOOPT-NEXT:    xorl %edi, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB26_7
+; NOOPT-NEXT:  .LBB26_2: # %bb1
+; NOOPT-NEXT:    movl $1, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB26_7
+; NOOPT-NEXT:  .LBB26_3: # %bb2
+; NOOPT-NEXT:    movl $2, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB26_7
+; NOOPT-NEXT:  .LBB26_4: # %bb3
+; NOOPT-NEXT:    movl $3, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB26_7
+; NOOPT-NEXT:  .LBB26_5: # %bb4
+; NOOPT-NEXT:    movl $4, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:    jmp .LBB26_7
+; NOOPT-NEXT:  .LBB26_6: # %bb5
+; NOOPT-NEXT:    movl $5, %edi
+; NOOPT-NEXT:    callq g at PLT
+; NOOPT-NEXT:  .LBB26_7: # %return
+; NOOPT-NEXT:    popq %rax
+; NOOPT-NEXT:    .cfi_def_cfa_offset 8
+; NOOPT-NEXT:    retq
+  %and = and i32 %a, 127
+  %trunc = trunc i32 %and to i8
+  switch i8 %trunc, label %return [
+    i8 1, label %bb0
+    i8 3, label %bb1
+    i8 7, label %bb2
+    i8 9, label %bb3
+    i8 13, label %bb4
+    i8 42, label %bb5
+  ]
+bb0: tail call void @g(i32 0) br label %return
+bb1: tail call void @g(i32 1) br label %return
+bb2: tail call void @g(i32 2) br label %return
+bb3: tail call void @g(i32 3) br label %return
+bb4: tail call void @g(i32 4) br label %return
+bb5: tail call void @g(i32 5) br label %return
+return:
+  ret void
 }