[llvm] c1013a6 - [X86][AArch64] Add additional extract_lowbits test

Danila Malyutin via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 19 23:18:38 PDT 2023


Author: Danila Malyutin
Date: 2023-07-20T09:18:19+03:00
New Revision: c1013a6eee0ca9ebbfda03fcfdb3afb42ebf31ff

URL: https://github.com/llvm/llvm-project/commit/c1013a6eee0ca9ebbfda03fcfdb3afb42ebf31ff
DIFF: https://github.com/llvm/llvm-project/commit/c1013a6eee0ca9ebbfda03fcfdb3afb42ebf31ff.diff

LOG: [X86][AArch64] Add additional extract_lowbits test

Check that vreg_width-1 mask is only removed for shifts

Differential Revision: https://reviews.llvm.org/D155734

Added: 
    

Modified: 
    llvm/test/CodeGen/AArch64/extract-lowbits.ll
    llvm/test/CodeGen/X86/extract-lowbits.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AArch64/extract-lowbits.ll b/llvm/test/CodeGen/AArch64/extract-lowbits.ll
index ecd8e1734245d6..741a1fb05eda2c 100644
--- a/llvm/test/CodeGen/AArch64/extract-lowbits.ll
+++ b/llvm/test/CodeGen/AArch64/extract-lowbits.ll
@@ -21,7 +21,7 @@
 define i32 @bzhi32_a0(i32 %val, i32 %numlowbits) nounwind {
 ; CHECK-LABEL: bzhi32_a0:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #1
+; CHECK-NEXT:    mov w8, #1 // =0x1
 ; CHECK-NEXT:    lsl w8, w8, w1
 ; CHECK-NEXT:    sub w8, w8, #1
 ; CHECK-NEXT:    and w0, w8, w0
@@ -35,7 +35,7 @@ define i32 @bzhi32_a0(i32 %val, i32 %numlowbits) nounwind {
 define i32 @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind {
 ; CHECK-LABEL: bzhi32_a1_indexzext:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #1
+; CHECK-NEXT:    mov w8, #1 // =0x1
 ; CHECK-NEXT:    lsl w8, w8, w1
 ; CHECK-NEXT:    sub w8, w8, #1
 ; CHECK-NEXT:    and w0, w8, w0
@@ -50,7 +50,7 @@ define i32 @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind {
 define i32 @bzhi32_a2_load(ptr %w, i32 %numlowbits) nounwind {
 ; CHECK-LABEL: bzhi32_a2_load:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #1
+; CHECK-NEXT:    mov w8, #1 // =0x1
 ; CHECK-NEXT:    ldr w9, [x0]
 ; CHECK-NEXT:    lsl w8, w8, w1
 ; CHECK-NEXT:    sub w8, w8, #1
@@ -66,7 +66,7 @@ define i32 @bzhi32_a2_load(ptr %w, i32 %numlowbits) nounwind {
 define i32 @bzhi32_a3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
 ; CHECK-LABEL: bzhi32_a3_load_indexzext:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #1
+; CHECK-NEXT:    mov w8, #1 // =0x1
 ; CHECK-NEXT:    ldr w9, [x0]
 ; CHECK-NEXT:    lsl w8, w8, w1
 ; CHECK-NEXT:    sub w8, w8, #1
@@ -83,7 +83,7 @@ define i32 @bzhi32_a3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
 define i32 @bzhi32_a4_commutative(i32 %val, i32 %numlowbits) nounwind {
 ; CHECK-LABEL: bzhi32_a4_commutative:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #1
+; CHECK-NEXT:    mov w8, #1 // =0x1
 ; CHECK-NEXT:    lsl w8, w8, w1
 ; CHECK-NEXT:    sub w8, w8, #1
 ; CHECK-NEXT:    and w0, w0, w8
@@ -99,7 +99,7 @@ define i32 @bzhi32_a4_commutative(i32 %val, i32 %numlowbits) nounwind {
 define i64 @bzhi64_a0(i64 %val, i64 %numlowbits) nounwind {
 ; CHECK-LABEL: bzhi64_a0:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #1
+; CHECK-NEXT:    mov w8, #1 // =0x1
 ; CHECK-NEXT:    lsl x8, x8, x1
 ; CHECK-NEXT:    sub x8, x8, #1
 ; CHECK-NEXT:    and x0, x8, x0
@@ -110,10 +110,26 @@ define i64 @bzhi64_a0(i64 %val, i64 %numlowbits) nounwind {
   ret i64 %masked
 }
 
+; Check that we don't throw away the vreg_width-1 mask if not using shifts
+define i64 @bzhi64_a0_masked(i64 %val, i64 %numlowbits) nounwind {
+; CHECK-LABEL: bzhi64_a0_masked:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEXT:    lsl x8, x8, x1
+; CHECK-NEXT:    sub x8, x8, #1
+; CHECK-NEXT:    and x0, x8, x0
+; CHECK-NEXT:    ret
+  %numlowbits.masked = and i64 %numlowbits, 63
+  %onebit = shl i64 1, %numlowbits.masked
+  %mask = add nsw i64 %onebit, -1
+  %masked = and i64 %mask, %val
+  ret i64 %masked
+}
+
 define i64 @bzhi64_a1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
 ; CHECK-LABEL: bzhi64_a1_indexzext:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #1
+; CHECK-NEXT:    mov w8, #1 // =0x1
 ; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsl x8, x8, x1
 ; CHECK-NEXT:    sub x8, x8, #1
@@ -129,7 +145,7 @@ define i64 @bzhi64_a1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
 define i64 @bzhi64_a2_load(ptr %w, i64 %numlowbits) nounwind {
 ; CHECK-LABEL: bzhi64_a2_load:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #1
+; CHECK-NEXT:    mov w8, #1 // =0x1
 ; CHECK-NEXT:    ldr x9, [x0]
 ; CHECK-NEXT:    lsl x8, x8, x1
 ; CHECK-NEXT:    sub x8, x8, #1
@@ -145,7 +161,7 @@ define i64 @bzhi64_a2_load(ptr %w, i64 %numlowbits) nounwind {
 define i64 @bzhi64_a3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
 ; CHECK-LABEL: bzhi64_a3_load_indexzext:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #1
+; CHECK-NEXT:    mov w8, #1 // =0x1
 ; CHECK-NEXT:    ldr x9, [x0]
 ; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsl x8, x8, x1
@@ -163,7 +179,7 @@ define i64 @bzhi64_a3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
 define i64 @bzhi64_a4_commutative(i64 %val, i64 %numlowbits) nounwind {
 ; CHECK-LABEL: bzhi64_a4_commutative:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #1
+; CHECK-NEXT:    mov w8, #1 // =0x1
 ; CHECK-NEXT:    lsl x8, x8, x1
 ; CHECK-NEXT:    sub x8, x8, #1
 ; CHECK-NEXT:    and x0, x0, x8
@@ -181,7 +197,7 @@ define i64 @bzhi64_a4_commutative(i64 %val, i64 %numlowbits) nounwind {
 define i32 @bzhi32_b0(i32 %val, i32 %numlowbits) nounwind {
 ; CHECK-LABEL: bzhi32_b0:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #-1
+; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    lsl w8, w8, w1
 ; CHECK-NEXT:    bic w0, w0, w8
 ; CHECK-NEXT:    ret
@@ -194,7 +210,7 @@ define i32 @bzhi32_b0(i32 %val, i32 %numlowbits) nounwind {
 define i32 @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind {
 ; CHECK-LABEL: bzhi32_b1_indexzext:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #-1
+; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    lsl w8, w8, w1
 ; CHECK-NEXT:    bic w0, w0, w8
 ; CHECK-NEXT:    ret
@@ -209,7 +225,7 @@ define i32 @bzhi32_b2_load(ptr %w, i32 %numlowbits) nounwind {
 ; CHECK-LABEL: bzhi32_b2_load:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr w8, [x0]
-; CHECK-NEXT:    mov w9, #-1
+; CHECK-NEXT:    mov w9, #-1 // =0xffffffff
 ; CHECK-NEXT:    lsl w9, w9, w1
 ; CHECK-NEXT:    bic w0, w8, w9
 ; CHECK-NEXT:    ret
@@ -224,7 +240,7 @@ define i32 @bzhi32_b3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
 ; CHECK-LABEL: bzhi32_b3_load_indexzext:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr w8, [x0]
-; CHECK-NEXT:    mov w9, #-1
+; CHECK-NEXT:    mov w9, #-1 // =0xffffffff
 ; CHECK-NEXT:    lsl w9, w9, w1
 ; CHECK-NEXT:    bic w0, w8, w9
 ; CHECK-NEXT:    ret
@@ -239,7 +255,7 @@ define i32 @bzhi32_b3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
 define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind {
 ; CHECK-LABEL: bzhi32_b4_commutative:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #-1
+; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-NEXT:    lsl w8, w8, w1
 ; CHECK-NEXT:    bic w0, w0, w8
 ; CHECK-NEXT:    ret
@@ -254,7 +270,7 @@ define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind {
 define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) nounwind {
 ; CHECK-LABEL: bzhi64_b0:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x8, #-1
+; CHECK-NEXT:    mov x8, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    lsl x8, x8, x1
 ; CHECK-NEXT:    bic x0, x0, x8
 ; CHECK-NEXT:    ret
@@ -267,7 +283,7 @@ define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) nounwind {
 define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
 ; CHECK-LABEL: bzhi64_b1_indexzext:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x8, #-1
+; CHECK-NEXT:    mov x8, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsl x8, x8, x1
 ; CHECK-NEXT:    bic x0, x0, x8
@@ -283,7 +299,7 @@ define i64 @bzhi64_b2_load(ptr %w, i64 %numlowbits) nounwind {
 ; CHECK-LABEL: bzhi64_b2_load:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr x8, [x0]
-; CHECK-NEXT:    mov x9, #-1
+; CHECK-NEXT:    mov x9, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    lsl x9, x9, x1
 ; CHECK-NEXT:    bic x0, x8, x9
 ; CHECK-NEXT:    ret
@@ -298,7 +314,7 @@ define i64 @bzhi64_b3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
 ; CHECK-LABEL: bzhi64_b3_load_indexzext:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr x8, [x0]
-; CHECK-NEXT:    mov x9, #-1
+; CHECK-NEXT:    mov x9, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsl x9, x9, x1
 ; CHECK-NEXT:    bic x0, x8, x9
@@ -314,7 +330,7 @@ define i64 @bzhi64_b3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
 define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind {
 ; CHECK-LABEL: bzhi64_b4_commutative:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x8, #-1
+; CHECK-NEXT:    mov x8, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    lsl x8, x8, x1
 ; CHECK-NEXT:    bic x0, x0, x8
 ; CHECK-NEXT:    ret
@@ -332,7 +348,7 @@ define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind {
 ; CHECK-LABEL: bzhi32_c0:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    neg w8, w1
-; CHECK-NEXT:    mov w9, #-1
+; CHECK-NEXT:    mov w9, #-1 // =0xffffffff
 ; CHECK-NEXT:    lsr w8, w9, w8
 ; CHECK-NEXT:    and w0, w8, w0
 ; CHECK-NEXT:    ret
@@ -345,8 +361,8 @@ define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind {
 define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind {
 ; CHECK-LABEL: bzhi32_c1_indexzext:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #32
-; CHECK-NEXT:    mov w9, #-1
+; CHECK-NEXT:    mov w8, #32 // =0x20
+; CHECK-NEXT:    mov w9, #-1 // =0xffffffff
 ; CHECK-NEXT:    sub w8, w8, w1
 ; CHECK-NEXT:    lsr w8, w9, w8
 ; CHECK-NEXT:    and w0, w8, w0
@@ -363,7 +379,7 @@ define i32 @bzhi32_c2_load(ptr %w, i32 %numlowbits) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    neg w8, w1
 ; CHECK-NEXT:    ldr w9, [x0]
-; CHECK-NEXT:    mov w10, #-1
+; CHECK-NEXT:    mov w10, #-1 // =0xffffffff
 ; CHECK-NEXT:    lsr w8, w10, w8
 ; CHECK-NEXT:    and w0, w8, w9
 ; CHECK-NEXT:    ret
@@ -377,10 +393,10 @@ define i32 @bzhi32_c2_load(ptr %w, i32 %numlowbits) nounwind {
 define i32 @bzhi32_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
 ; CHECK-LABEL: bzhi32_c3_load_indexzext:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #32
+; CHECK-NEXT:    mov w8, #32 // =0x20
 ; CHECK-NEXT:    ldr w9, [x0]
 ; CHECK-NEXT:    sub w8, w8, w1
-; CHECK-NEXT:    mov w10, #-1
+; CHECK-NEXT:    mov w10, #-1 // =0xffffffff
 ; CHECK-NEXT:    lsr w8, w10, w8
 ; CHECK-NEXT:    and w0, w8, w9
 ; CHECK-NEXT:    ret
@@ -396,7 +412,7 @@ define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind {
 ; CHECK-LABEL: bzhi32_c4_commutative:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    neg w8, w1
-; CHECK-NEXT:    mov w9, #-1
+; CHECK-NEXT:    mov w9, #-1 // =0xffffffff
 ; CHECK-NEXT:    lsr w8, w9, w8
 ; CHECK-NEXT:    and w0, w0, w8
 ; CHECK-NEXT:    ret
@@ -412,7 +428,7 @@ define i64 @bzhi64_c0(i64 %val, i64 %numlowbits) nounwind {
 ; CHECK-LABEL: bzhi64_c0:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    neg x8, x1
-; CHECK-NEXT:    mov x9, #-1
+; CHECK-NEXT:    mov x9, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    lsr x8, x9, x8
 ; CHECK-NEXT:    and x0, x8, x0
 ; CHECK-NEXT:    ret
@@ -425,8 +441,8 @@ define i64 @bzhi64_c0(i64 %val, i64 %numlowbits) nounwind {
 define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind {
 ; CHECK-LABEL: bzhi64_c1_indexzext:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #64
-; CHECK-NEXT:    mov x9, #-1
+; CHECK-NEXT:    mov w8, #64 // =0x40
+; CHECK-NEXT:    mov x9, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    sub w8, w8, w1
 ; CHECK-NEXT:    lsr x8, x9, x8
 ; CHECK-NEXT:    and x0, x8, x0
@@ -443,7 +459,7 @@ define i64 @bzhi64_c2_load(ptr %w, i64 %numlowbits) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    neg x8, x1
 ; CHECK-NEXT:    ldr x9, [x0]
-; CHECK-NEXT:    mov x10, #-1
+; CHECK-NEXT:    mov x10, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    lsr x8, x10, x8
 ; CHECK-NEXT:    and x0, x8, x9
 ; CHECK-NEXT:    ret
@@ -457,10 +473,10 @@ define i64 @bzhi64_c2_load(ptr %w, i64 %numlowbits) nounwind {
 define i64 @bzhi64_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
 ; CHECK-LABEL: bzhi64_c3_load_indexzext:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #64
+; CHECK-NEXT:    mov w8, #64 // =0x40
 ; CHECK-NEXT:    ldr x9, [x0]
 ; CHECK-NEXT:    sub w8, w8, w1
-; CHECK-NEXT:    mov x10, #-1
+; CHECK-NEXT:    mov x10, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    lsr x8, x10, x8
 ; CHECK-NEXT:    and x0, x8, x9
 ; CHECK-NEXT:    ret
@@ -476,7 +492,7 @@ define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits) nounwind {
 ; CHECK-LABEL: bzhi64_c4_commutative:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    neg x8, x1
-; CHECK-NEXT:    mov x9, #-1
+; CHECK-NEXT:    mov x9, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    lsr x8, x9, x8
 ; CHECK-NEXT:    and x0, x0, x8
 ; CHECK-NEXT:    ret
@@ -506,7 +522,7 @@ define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind {
 define i32 @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits) nounwind {
 ; CHECK-LABEL: bzhi32_d1_indexzext:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #32
+; CHECK-NEXT:    mov w8, #32 // =0x20
 ; CHECK-NEXT:    sub w8, w8, w1
 ; CHECK-NEXT:    lsl w9, w0, w8
 ; CHECK-NEXT:    lsr w0, w9, w8
@@ -536,7 +552,7 @@ define i32 @bzhi32_d2_load(ptr %w, i32 %numlowbits) nounwind {
 define i32 @bzhi32_d3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
 ; CHECK-LABEL: bzhi32_d3_load_indexzext:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #32
+; CHECK-NEXT:    mov w8, #32 // =0x20
 ; CHECK-NEXT:    ldr w9, [x0]
 ; CHECK-NEXT:    sub w8, w8, w1
 ; CHECK-NEXT:    lsl w9, w9, w8
@@ -568,7 +584,7 @@ define i64 @bzhi64_d0(i64 %val, i64 %numlowbits) nounwind {
 define i64 @bzhi64_d1_indexzext(i64 %val, i8 %numlowbits) nounwind {
 ; CHECK-LABEL: bzhi64_d1_indexzext:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #64
+; CHECK-NEXT:    mov w8, #64 // =0x40
 ; CHECK-NEXT:    sub w8, w8, w1
 ; CHECK-NEXT:    lsl x9, x0, x8
 ; CHECK-NEXT:    lsr x0, x9, x8
@@ -598,7 +614,7 @@ define i64 @bzhi64_d2_load(ptr %w, i64 %numlowbits) nounwind {
 define i64 @bzhi64_d3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
 ; CHECK-LABEL: bzhi64_d3_load_indexzext:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #64
+; CHECK-NEXT:    mov w8, #64 // =0x40
 ; CHECK-NEXT:    ldr x9, [x0]
 ; CHECK-NEXT:    sub w8, w8, w1
 ; CHECK-NEXT:    lsl x9, x9, x8

diff  --git a/llvm/test/CodeGen/X86/extract-lowbits.ll b/llvm/test/CodeGen/X86/extract-lowbits.ll
index 823e26d63e15a4..743e305117b756 100644
--- a/llvm/test/CodeGen/X86/extract-lowbits.ll
+++ b/llvm/test/CodeGen/X86/extract-lowbits.ll
@@ -368,8 +368,9 @@ define i64 @bzhi64_a0(i64 %val, i64 %numlowbits) nounwind {
   ret i64 %masked
 }
 
-define i64 @bzhi64_a1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
-; X86-NOBMI-LABEL: bzhi64_a1_indexzext:
+; Check that we don't throw away the vreg_width-1 mask if not using shifts
+define i64 @bzhi64_a0_masked(i64 %val, i64 %numlowbits) nounwind {
+; X86-NOBMI-LABEL: bzhi64_a0_masked:
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-NOBMI-NEXT:    movl $1, %eax
@@ -388,7 +389,7 @@ define i64 @bzhi64_a1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1-LABEL: bzhi64_a1_indexzext:
+; X86-BMI1-LABEL: bzhi64_a0_masked:
 ; X86-BMI1:       # %bb.0:
 ; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI1-NEXT:    movl $1, %eax
@@ -407,7 +408,7 @@ define i64 @bzhi64_a1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
 ; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    retl
 ;
-; X86-BMI2-LABEL: bzhi64_a1_indexzext:
+; X86-BMI2-LABEL: bzhi64_a0_masked:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    movl $1, %eax
@@ -426,6 +427,93 @@ define i64 @bzhi64_a1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
 ; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    retl
 ;
+; X64-NOBMI-LABEL: bzhi64_a0_masked:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movq %rsi, %rcx
+; X64-NOBMI-NEXT:    movl $1, %eax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI-NEXT:    shlq %cl, %rax
+; X64-NOBMI-NEXT:    decq %rax
+; X64-NOBMI-NEXT:    andq %rdi, %rax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI1-LABEL: bzhi64_a0_masked:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    andb $63, %sil
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrq %rsi, %rdi, %rax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bzhi64_a0_masked:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    andb $63, %sil
+; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
+; X64-BMI2-NEXT:    retq
+  %numlowbits.masked = and i64 %numlowbits, 63
+  %onebit = shl i64 1, %numlowbits.masked
+  %mask = add nsw i64 %onebit, -1
+  %masked = and i64 %mask, %val
+  ret i64 %masked
+}
+
+define i64 @bzhi64_a1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
+; X86-NOBMI-LABEL: bzhi64_a1_indexzext:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movl $1, %eax
+; X86-NOBMI-NEXT:    xorl %edx, %edx
+; X86-NOBMI-NEXT:    shldl %cl, %eax, %edx
+; X86-NOBMI-NEXT:    shll %cl, %eax
+; X86-NOBMI-NEXT:    testb $32, %cl
+; X86-NOBMI-NEXT:    je .LBB7_2
+; X86-NOBMI-NEXT:  # %bb.1:
+; X86-NOBMI-NEXT:    movl %eax, %edx
+; X86-NOBMI-NEXT:    xorl %eax, %eax
+; X86-NOBMI-NEXT:  .LBB7_2:
+; X86-NOBMI-NEXT:    addl $-1, %eax
+; X86-NOBMI-NEXT:    adcl $-1, %edx
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI1-LABEL: bzhi64_a1_indexzext:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movl $1, %eax
+; X86-BMI1-NEXT:    xorl %edx, %edx
+; X86-BMI1-NEXT:    shldl %cl, %eax, %edx
+; X86-BMI1-NEXT:    shll %cl, %eax
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB7_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %eax, %edx
+; X86-BMI1-NEXT:    xorl %eax, %eax
+; X86-BMI1-NEXT:  .LBB7_2:
+; X86-BMI1-NEXT:    addl $-1, %eax
+; X86-BMI1-NEXT:    adcl $-1, %edx
+; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bzhi64_a1_indexzext:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movl $1, %eax
+; X86-BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
+; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB7_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %eax, %edx
+; X86-BMI2-NEXT:    xorl %eax, %eax
+; X86-BMI2-NEXT:  .LBB7_2:
+; X86-BMI2-NEXT:    addl $-1, %eax
+; X86-BMI2-NEXT:    adcl $-1, %edx
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    retl
+;
 ; X64-NOBMI-LABEL: bzhi64_a1_indexzext:
 ; X64-NOBMI:       # %bb.0:
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
@@ -466,11 +554,11 @@ define i64 @bzhi64_a2_load(ptr %w, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shldl %cl, %eax, %edx
 ; X86-NOBMI-NEXT:    shll %cl, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB7_2
+; X86-NOBMI-NEXT:    je .LBB8_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edx
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
-; X86-NOBMI-NEXT:  .LBB7_2:
+; X86-NOBMI-NEXT:  .LBB8_2:
 ; X86-NOBMI-NEXT:    addl $-1, %eax
 ; X86-NOBMI-NEXT:    adcl $-1, %edx
 ; X86-NOBMI-NEXT:    andl 4(%esi), %edx
@@ -488,11 +576,11 @@ define i64 @bzhi64_a2_load(ptr %w, i64 %numlowbits) nounwind {
 ; X86-BMI1-NEXT:    shldl %cl, %eax, %edx
 ; X86-BMI1-NEXT:    shll %cl, %eax
 ; X86-BMI1-NEXT:    testb $32, %cl
-; X86-BMI1-NEXT:    je .LBB7_2
+; X86-BMI1-NEXT:    je .LBB8_2
 ; X86-BMI1-NEXT:  # %bb.1:
 ; X86-BMI1-NEXT:    movl %eax, %edx
 ; X86-BMI1-NEXT:    xorl %eax, %eax
-; X86-BMI1-NEXT:  .LBB7_2:
+; X86-BMI1-NEXT:  .LBB8_2:
 ; X86-BMI1-NEXT:    addl $-1, %eax
 ; X86-BMI1-NEXT:    adcl $-1, %edx
 ; X86-BMI1-NEXT:    andl 4(%esi), %edx
@@ -510,11 +598,11 @@ define i64 @bzhi64_a2_load(ptr %w, i64 %numlowbits) nounwind {
 ; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
 ; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
 ; X86-BMI2-NEXT:    testb $32, %cl
-; X86-BMI2-NEXT:    je .LBB7_2
+; X86-BMI2-NEXT:    je .LBB8_2
 ; X86-BMI2-NEXT:  # %bb.1:
 ; X86-BMI2-NEXT:    movl %eax, %edx
 ; X86-BMI2-NEXT:    xorl %eax, %eax
-; X86-BMI2-NEXT:  .LBB7_2:
+; X86-BMI2-NEXT:  .LBB8_2:
 ; X86-BMI2-NEXT:    addl $-1, %eax
 ; X86-BMI2-NEXT:    adcl $-1, %edx
 ; X86-BMI2-NEXT:    andl 4(%esi), %edx
@@ -560,11 +648,11 @@ define i64 @bzhi64_a3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shldl %cl, %eax, %edx
 ; X86-NOBMI-NEXT:    shll %cl, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB8_2
+; X86-NOBMI-NEXT:    je .LBB9_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edx
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
-; X86-NOBMI-NEXT:  .LBB8_2:
+; X86-NOBMI-NEXT:  .LBB9_2:
 ; X86-NOBMI-NEXT:    addl $-1, %eax
 ; X86-NOBMI-NEXT:    adcl $-1, %edx
 ; X86-NOBMI-NEXT:    andl 4(%esi), %edx
@@ -582,11 +670,11 @@ define i64 @bzhi64_a3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
 ; X86-BMI1-NEXT:    shldl %cl, %eax, %edx
 ; X86-BMI1-NEXT:    shll %cl, %eax
 ; X86-BMI1-NEXT:    testb $32, %cl
-; X86-BMI1-NEXT:    je .LBB8_2
+; X86-BMI1-NEXT:    je .LBB9_2
 ; X86-BMI1-NEXT:  # %bb.1:
 ; X86-BMI1-NEXT:    movl %eax, %edx
 ; X86-BMI1-NEXT:    xorl %eax, %eax
-; X86-BMI1-NEXT:  .LBB8_2:
+; X86-BMI1-NEXT:  .LBB9_2:
 ; X86-BMI1-NEXT:    addl $-1, %eax
 ; X86-BMI1-NEXT:    adcl $-1, %edx
 ; X86-BMI1-NEXT:    andl 4(%esi), %edx
@@ -604,11 +692,11 @@ define i64 @bzhi64_a3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
 ; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
 ; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
 ; X86-BMI2-NEXT:    testb $32, %cl
-; X86-BMI2-NEXT:    je .LBB8_2
+; X86-BMI2-NEXT:    je .LBB9_2
 ; X86-BMI2-NEXT:  # %bb.1:
 ; X86-BMI2-NEXT:    movl %eax, %edx
 ; X86-BMI2-NEXT:    xorl %eax, %eax
-; X86-BMI2-NEXT:  .LBB8_2:
+; X86-BMI2-NEXT:  .LBB9_2:
 ; X86-BMI2-NEXT:    addl $-1, %eax
 ; X86-BMI2-NEXT:    adcl $-1, %edx
 ; X86-BMI2-NEXT:    andl 4(%esi), %edx
@@ -655,11 +743,11 @@ define i64 @bzhi64_a4_commutative(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shldl %cl, %eax, %edx
 ; X86-NOBMI-NEXT:    shll %cl, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB9_2
+; X86-NOBMI-NEXT:    je .LBB10_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edx
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
-; X86-NOBMI-NEXT:  .LBB9_2:
+; X86-NOBMI-NEXT:  .LBB10_2:
 ; X86-NOBMI-NEXT:    addl $-1, %eax
 ; X86-NOBMI-NEXT:    adcl $-1, %edx
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
@@ -674,11 +762,11 @@ define i64 @bzhi64_a4_commutative(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1-NEXT:    shldl %cl, %eax, %edx
 ; X86-BMI1-NEXT:    shll %cl, %eax
 ; X86-BMI1-NEXT:    testb $32, %cl
-; X86-BMI1-NEXT:    je .LBB9_2
+; X86-BMI1-NEXT:    je .LBB10_2
 ; X86-BMI1-NEXT:  # %bb.1:
 ; X86-BMI1-NEXT:    movl %eax, %edx
 ; X86-BMI1-NEXT:    xorl %eax, %eax
-; X86-BMI1-NEXT:  .LBB9_2:
+; X86-BMI1-NEXT:  .LBB10_2:
 ; X86-BMI1-NEXT:    addl $-1, %eax
 ; X86-BMI1-NEXT:    adcl $-1, %edx
 ; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edx
@@ -693,11 +781,11 @@ define i64 @bzhi64_a4_commutative(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
 ; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
 ; X86-BMI2-NEXT:    testb $32, %cl
-; X86-BMI2-NEXT:    je .LBB9_2
+; X86-BMI2-NEXT:    je .LBB10_2
 ; X86-BMI2-NEXT:  # %bb.1:
 ; X86-BMI2-NEXT:    movl %eax, %edx
 ; X86-BMI2-NEXT:    xorl %eax, %eax
-; X86-BMI2-NEXT:  .LBB9_2:
+; X86-BMI2-NEXT:  .LBB10_2:
 ; X86-BMI2-NEXT:    addl $-1, %eax
 ; X86-BMI2-NEXT:    adcl $-1, %edx
 ; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
@@ -741,10 +829,10 @@ define i32 @bzhi64_32_a0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shll %cl, %edx
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB10_2
+; X86-NOBMI-NEXT:    jne .LBB11_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edx, %eax
-; X86-NOBMI-NEXT:  .LBB10_2:
+; X86-NOBMI-NEXT:  .LBB11_2:
 ; X86-NOBMI-NEXT:    decl %eax
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    retl
@@ -756,10 +844,10 @@ define i32 @bzhi64_32_a0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1-NEXT:    shll %cl, %edx
 ; X86-BMI1-NEXT:    xorl %eax, %eax
 ; X86-BMI1-NEXT:    testb $32, %cl
-; X86-BMI1-NEXT:    jne .LBB10_2
+; X86-BMI1-NEXT:    jne .LBB11_2
 ; X86-BMI1-NEXT:  # %bb.1:
 ; X86-BMI1-NEXT:    movl %edx, %eax
-; X86-BMI1-NEXT:  .LBB10_2:
+; X86-BMI1-NEXT:  .LBB11_2:
 ; X86-BMI1-NEXT:    decl %eax
 ; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    retl
@@ -769,11 +857,11 @@ define i32 @bzhi64_32_a0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    xorl %eax, %eax
 ; X86-BMI2-NEXT:    testb $32, %cl
-; X86-BMI2-NEXT:    jne .LBB10_2
+; X86-BMI2-NEXT:    jne .LBB11_2
 ; X86-BMI2-NEXT:  # %bb.1:
 ; X86-BMI2-NEXT:    movl $1, %eax
 ; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
-; X86-BMI2-NEXT:  .LBB10_2:
+; X86-BMI2-NEXT:  .LBB11_2:
 ; X86-BMI2-NEXT:    decl %eax
 ; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    retl
@@ -985,10 +1073,10 @@ define i32 @bzhi64_32_a3(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shll %cl, %edx
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB14_2
+; X86-NOBMI-NEXT:    jne .LBB15_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edx, %eax
-; X86-NOBMI-NEXT:  .LBB14_2:
+; X86-NOBMI-NEXT:  .LBB15_2:
 ; X86-NOBMI-NEXT:    decl %eax
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    retl
@@ -1000,10 +1088,10 @@ define i32 @bzhi64_32_a3(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1-NEXT:    shll %cl, %edx
 ; X86-BMI1-NEXT:    xorl %eax, %eax
 ; X86-BMI1-NEXT:    testb $32, %cl
-; X86-BMI1-NEXT:    jne .LBB14_2
+; X86-BMI1-NEXT:    jne .LBB15_2
 ; X86-BMI1-NEXT:  # %bb.1:
 ; X86-BMI1-NEXT:    movl %edx, %eax
-; X86-BMI1-NEXT:  .LBB14_2:
+; X86-BMI1-NEXT:  .LBB15_2:
 ; X86-BMI1-NEXT:    decl %eax
 ; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    retl
@@ -1013,11 +1101,11 @@ define i32 @bzhi64_32_a3(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    xorl %eax, %eax
 ; X86-BMI2-NEXT:    testb $32, %cl
-; X86-BMI2-NEXT:    jne .LBB14_2
+; X86-BMI2-NEXT:    jne .LBB15_2
 ; X86-BMI2-NEXT:  # %bb.1:
 ; X86-BMI2-NEXT:    movl $1, %eax
 ; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
-; X86-BMI2-NEXT:  .LBB14_2:
+; X86-BMI2-NEXT:  .LBB15_2:
 ; X86-BMI2-NEXT:    decl %eax
 ; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    retl
@@ -1321,13 +1409,13 @@ define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shll %cl, %esi
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB20_1
+; X86-NOBMI-NEXT:    jne .LBB21_1
 ; X86-NOBMI-NEXT:  # %bb.2:
 ; X86-NOBMI-NEXT:    movl %esi, %eax
-; X86-NOBMI-NEXT:    jmp .LBB20_3
-; X86-NOBMI-NEXT:  .LBB20_1:
+; X86-NOBMI-NEXT:    jmp .LBB21_3
+; X86-NOBMI-NEXT:  .LBB21_1:
 ; X86-NOBMI-NEXT:    movl %esi, %edx
-; X86-NOBMI-NEXT:  .LBB20_3:
+; X86-NOBMI-NEXT:  .LBB21_3:
 ; X86-NOBMI-NEXT:    notl %edx
 ; X86-NOBMI-NEXT:    notl %eax
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
@@ -1342,11 +1430,11 @@ define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1-NEXT:    movl $-1, %eax
 ; X86-BMI1-NEXT:    shll %cl, %eax
 ; X86-BMI1-NEXT:    testb $32, %cl
-; X86-BMI1-NEXT:    je .LBB20_2
+; X86-BMI1-NEXT:    je .LBB21_2
 ; X86-BMI1-NEXT:  # %bb.1:
 ; X86-BMI1-NEXT:    movl %eax, %edx
 ; X86-BMI1-NEXT:    xorl %eax, %eax
-; X86-BMI1-NEXT:  .LBB20_2:
+; X86-BMI1-NEXT:  .LBB21_2:
 ; X86-BMI1-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
 ; X86-BMI1-NEXT:    andnl {{[0-9]+}}(%esp), %edx, %edx
 ; X86-BMI1-NEXT:    retl
@@ -1357,11 +1445,11 @@ define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI2-NEXT:    movl $-1, %ecx
 ; X86-BMI2-NEXT:    shlxl %edx, %ecx, %eax
 ; X86-BMI2-NEXT:    testb $32, %dl
-; X86-BMI2-NEXT:    je .LBB20_2
+; X86-BMI2-NEXT:    je .LBB21_2
 ; X86-BMI2-NEXT:  # %bb.1:
 ; X86-BMI2-NEXT:    movl %eax, %ecx
 ; X86-BMI2-NEXT:    xorl %eax, %eax
-; X86-BMI2-NEXT:  .LBB20_2:
+; X86-BMI2-NEXT:  .LBB21_2:
 ; X86-BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
 ; X86-BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %edx
 ; X86-BMI2-NEXT:    retl
@@ -1402,13 +1490,13 @@ define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shll %cl, %esi
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB21_1
+; X86-NOBMI-NEXT:    jne .LBB22_1
 ; X86-NOBMI-NEXT:  # %bb.2:
 ; X86-NOBMI-NEXT:    movl %esi, %eax
-; X86-NOBMI-NEXT:    jmp .LBB21_3
-; X86-NOBMI-NEXT:  .LBB21_1:
+; X86-NOBMI-NEXT:    jmp .LBB22_3
+; X86-NOBMI-NEXT:  .LBB22_1:
 ; X86-NOBMI-NEXT:    movl %esi, %edx
-; X86-NOBMI-NEXT:  .LBB21_3:
+; X86-NOBMI-NEXT:  .LBB22_3:
 ; X86-NOBMI-NEXT:    notl %edx
 ; X86-NOBMI-NEXT:    notl %eax
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
@@ -1423,11 +1511,11 @@ define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
 ; X86-BMI1-NEXT:    movl $-1, %eax
 ; X86-BMI1-NEXT:    shll %cl, %eax
 ; X86-BMI1-NEXT:    testb $32, %cl
-; X86-BMI1-NEXT:    je .LBB21_2
+; X86-BMI1-NEXT:    je .LBB22_2
 ; X86-BMI1-NEXT:  # %bb.1:
 ; X86-BMI1-NEXT:    movl %eax, %edx
 ; X86-BMI1-NEXT:    xorl %eax, %eax
-; X86-BMI1-NEXT:  .LBB21_2:
+; X86-BMI1-NEXT:  .LBB22_2:
 ; X86-BMI1-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
 ; X86-BMI1-NEXT:    andnl {{[0-9]+}}(%esp), %edx, %edx
 ; X86-BMI1-NEXT:    retl
@@ -1438,11 +1526,11 @@ define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
 ; X86-BMI2-NEXT:    movl $-1, %ecx
 ; X86-BMI2-NEXT:    shlxl %edx, %ecx, %eax
 ; X86-BMI2-NEXT:    testb $32, %dl
-; X86-BMI2-NEXT:    je .LBB21_2
+; X86-BMI2-NEXT:    je .LBB22_2
 ; X86-BMI2-NEXT:  # %bb.1:
 ; X86-BMI2-NEXT:    movl %eax, %ecx
 ; X86-BMI2-NEXT:    xorl %eax, %eax
-; X86-BMI2-NEXT:  .LBB21_2:
+; X86-BMI2-NEXT:  .LBB22_2:
 ; X86-BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
 ; X86-BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %edx
 ; X86-BMI2-NEXT:    retl
@@ -1488,13 +1576,13 @@ define i64 @bzhi64_b2_load(ptr %w, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shll %cl, %edi
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB22_1
+; X86-NOBMI-NEXT:    jne .LBB23_1
 ; X86-NOBMI-NEXT:  # %bb.2:
 ; X86-NOBMI-NEXT:    movl %edi, %eax
-; X86-NOBMI-NEXT:    jmp .LBB22_3
-; X86-NOBMI-NEXT:  .LBB22_1:
+; X86-NOBMI-NEXT:    jmp .LBB23_3
+; X86-NOBMI-NEXT:  .LBB23_1:
 ; X86-NOBMI-NEXT:    movl %edi, %edx
-; X86-NOBMI-NEXT:  .LBB22_3:
+; X86-NOBMI-NEXT:  .LBB23_3:
 ; X86-NOBMI-NEXT:    notl %edx
 ; X86-NOBMI-NEXT:    notl %eax
 ; X86-NOBMI-NEXT:    andl (%esi), %eax
@@ -1512,11 +1600,11 @@ define i64 @bzhi64_b2_load(ptr %w, i64 %numlowbits) nounwind {
 ; X86-BMI1-NEXT:    movl $-1, %eax
 ; X86-BMI1-NEXT:    shll %cl, %eax
 ; X86-BMI1-NEXT:    testb $32, %cl
-; X86-BMI1-NEXT:    je .LBB22_2
+; X86-BMI1-NEXT:    je .LBB23_2
 ; X86-BMI1-NEXT:  # %bb.1:
 ; X86-BMI1-NEXT:    movl %eax, %esi
 ; X86-BMI1-NEXT:    xorl %eax, %eax
-; X86-BMI1-NEXT:  .LBB22_2:
+; X86-BMI1-NEXT:  .LBB23_2:
 ; X86-BMI1-NEXT:    andnl (%edx), %eax, %eax
 ; X86-BMI1-NEXT:    andnl 4(%edx), %esi, %edx
 ; X86-BMI1-NEXT:    popl %esi
@@ -1530,11 +1618,11 @@ define i64 @bzhi64_b2_load(ptr %w, i64 %numlowbits) nounwind {
 ; X86-BMI2-NEXT:    movl $-1, %edx
 ; X86-BMI2-NEXT:    shlxl %ebx, %edx, %eax
 ; X86-BMI2-NEXT:    testb $32, %bl
-; X86-BMI2-NEXT:    je .LBB22_2
+; X86-BMI2-NEXT:    je .LBB23_2
 ; X86-BMI2-NEXT:  # %bb.1:
 ; X86-BMI2-NEXT:    movl %eax, %edx
 ; X86-BMI2-NEXT:    xorl %eax, %eax
-; X86-BMI2-NEXT:  .LBB22_2:
+; X86-BMI2-NEXT:  .LBB23_2:
 ; X86-BMI2-NEXT:    andnl (%ecx), %eax, %eax
 ; X86-BMI2-NEXT:    andnl 4(%ecx), %edx, %edx
 ; X86-BMI2-NEXT:    popl %ebx
@@ -1579,13 +1667,13 @@ define i64 @bzhi64_b3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shll %cl, %edi
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB23_1
+; X86-NOBMI-NEXT:    jne .LBB24_1
 ; X86-NOBMI-NEXT:  # %bb.2:
 ; X86-NOBMI-NEXT:    movl %edi, %eax
-; X86-NOBMI-NEXT:    jmp .LBB23_3
-; X86-NOBMI-NEXT:  .LBB23_1:
+; X86-NOBMI-NEXT:    jmp .LBB24_3
+; X86-NOBMI-NEXT:  .LBB24_1:
 ; X86-NOBMI-NEXT:    movl %edi, %edx
-; X86-NOBMI-NEXT:  .LBB23_3:
+; X86-NOBMI-NEXT:  .LBB24_3:
 ; X86-NOBMI-NEXT:    notl %edx
 ; X86-NOBMI-NEXT:    notl %eax
 ; X86-NOBMI-NEXT:    andl (%esi), %eax
@@ -1603,11 +1691,11 @@ define i64 @bzhi64_b3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
 ; X86-BMI1-NEXT:    movl $-1, %eax
 ; X86-BMI1-NEXT:    shll %cl, %eax
 ; X86-BMI1-NEXT:    testb $32, %cl
-; X86-BMI1-NEXT:    je .LBB23_2
+; X86-BMI1-NEXT:    je .LBB24_2
 ; X86-BMI1-NEXT:  # %bb.1:
 ; X86-BMI1-NEXT:    movl %eax, %esi
 ; X86-BMI1-NEXT:    xorl %eax, %eax
-; X86-BMI1-NEXT:  .LBB23_2:
+; X86-BMI1-NEXT:  .LBB24_2:
 ; X86-BMI1-NEXT:    andnl (%edx), %eax, %eax
 ; X86-BMI1-NEXT:    andnl 4(%edx), %esi, %edx
 ; X86-BMI1-NEXT:    popl %esi
@@ -1621,11 +1709,11 @@ define i64 @bzhi64_b3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
 ; X86-BMI2-NEXT:    movl $-1, %edx
 ; X86-BMI2-NEXT:    shlxl %ebx, %edx, %eax
 ; X86-BMI2-NEXT:    testb $32, %bl
-; X86-BMI2-NEXT:    je .LBB23_2
+; X86-BMI2-NEXT:    je .LBB24_2
 ; X86-BMI2-NEXT:  # %bb.1:
 ; X86-BMI2-NEXT:    movl %eax, %edx
 ; X86-BMI2-NEXT:    xorl %eax, %eax
-; X86-BMI2-NEXT:  .LBB23_2:
+; X86-BMI2-NEXT:  .LBB24_2:
 ; X86-BMI2-NEXT:    andnl (%ecx), %eax, %eax
 ; X86-BMI2-NEXT:    andnl 4(%ecx), %edx, %edx
 ; X86-BMI2-NEXT:    popl %ebx
@@ -1671,13 +1759,13 @@ define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shll %cl, %esi
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB24_1
+; X86-NOBMI-NEXT:    jne .LBB25_1
 ; X86-NOBMI-NEXT:  # %bb.2:
 ; X86-NOBMI-NEXT:    movl %esi, %eax
-; X86-NOBMI-NEXT:    jmp .LBB24_3
-; X86-NOBMI-NEXT:  .LBB24_1:
+; X86-NOBMI-NEXT:    jmp .LBB25_3
+; X86-NOBMI-NEXT:  .LBB25_1:
 ; X86-NOBMI-NEXT:    movl %esi, %edx
-; X86-NOBMI-NEXT:  .LBB24_3:
+; X86-NOBMI-NEXT:  .LBB25_3:
 ; X86-NOBMI-NEXT:    notl %edx
 ; X86-NOBMI-NEXT:    notl %eax
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
@@ -1692,11 +1780,11 @@ define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1-NEXT:    movl $-1, %eax
 ; X86-BMI1-NEXT:    shll %cl, %eax
 ; X86-BMI1-NEXT:    testb $32, %cl
-; X86-BMI1-NEXT:    je .LBB24_2
+; X86-BMI1-NEXT:    je .LBB25_2
 ; X86-BMI1-NEXT:  # %bb.1:
 ; X86-BMI1-NEXT:    movl %eax, %edx
 ; X86-BMI1-NEXT:    xorl %eax, %eax
-; X86-BMI1-NEXT:  .LBB24_2:
+; X86-BMI1-NEXT:  .LBB25_2:
 ; X86-BMI1-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
 ; X86-BMI1-NEXT:    andnl {{[0-9]+}}(%esp), %edx, %edx
 ; X86-BMI1-NEXT:    retl
@@ -1707,11 +1795,11 @@ define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI2-NEXT:    movl $-1, %ecx
 ; X86-BMI2-NEXT:    shlxl %edx, %ecx, %eax
 ; X86-BMI2-NEXT:    testb $32, %dl
-; X86-BMI2-NEXT:    je .LBB24_2
+; X86-BMI2-NEXT:    je .LBB25_2
 ; X86-BMI2-NEXT:  # %bb.1:
 ; X86-BMI2-NEXT:    movl %eax, %ecx
 ; X86-BMI2-NEXT:    xorl %eax, %eax
-; X86-BMI2-NEXT:  .LBB24_2:
+; X86-BMI2-NEXT:  .LBB25_2:
 ; X86-BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
 ; X86-BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %edx
 ; X86-BMI2-NEXT:    retl
@@ -1753,10 +1841,10 @@ define i32 @bzhi64_32_b0(i64 %val, i8 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shll %cl, %edx
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB25_2
+; X86-NOBMI-NEXT:    jne .LBB26_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edx, %eax
-; X86-NOBMI-NEXT:  .LBB25_2:
+; X86-NOBMI-NEXT:  .LBB26_2:
 ; X86-NOBMI-NEXT:    notl %eax
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    retl
@@ -1768,10 +1856,10 @@ define i32 @bzhi64_32_b0(i64 %val, i8 %numlowbits) nounwind {
 ; X86-BMI1-NEXT:    shll %cl, %eax
 ; X86-BMI1-NEXT:    xorl %edx, %edx
 ; X86-BMI1-NEXT:    testb $32, %cl
-; X86-BMI1-NEXT:    jne .LBB25_2
+; X86-BMI1-NEXT:    jne .LBB26_2
 ; X86-BMI1-NEXT:  # %bb.1:
 ; X86-BMI1-NEXT:    movl %eax, %edx
-; X86-BMI1-NEXT:  .LBB25_2:
+; X86-BMI1-NEXT:  .LBB26_2:
 ; X86-BMI1-NEXT:    andnl {{[0-9]+}}(%esp), %edx, %eax
 ; X86-BMI1-NEXT:    retl
 ;
@@ -1780,11 +1868,11 @@ define i32 @bzhi64_32_b0(i64 %val, i8 %numlowbits) nounwind {
 ; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    xorl %ecx, %ecx
 ; X86-BMI2-NEXT:    testb $32, %al
-; X86-BMI2-NEXT:    jne .LBB25_2
+; X86-BMI2-NEXT:    jne .LBB26_2
 ; X86-BMI2-NEXT:  # %bb.1:
 ; X86-BMI2-NEXT:    movl $-1, %ecx
 ; X86-BMI2-NEXT:    shlxl %eax, %ecx, %ecx
-; X86-BMI2-NEXT:  .LBB25_2:
+; X86-BMI2-NEXT:  .LBB26_2:
 ; X86-BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %eax
 ; X86-BMI2-NEXT:    retl
 ;
@@ -1933,10 +2021,10 @@ define i32 @bzhi64_32_b3(i64 %val, i8 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shll %cl, %edx
 ; X86-NOBMI-NEXT:    xorl %eax, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB28_2
+; X86-NOBMI-NEXT:    jne .LBB29_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edx, %eax
-; X86-NOBMI-NEXT:  .LBB28_2:
+; X86-NOBMI-NEXT:  .LBB29_2:
 ; X86-NOBMI-NEXT:    notl %eax
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    retl
@@ -1948,10 +2036,10 @@ define i32 @bzhi64_32_b3(i64 %val, i8 %numlowbits) nounwind {
 ; X86-BMI1-NEXT:    shll %cl, %eax
 ; X86-BMI1-NEXT:    xorl %edx, %edx
 ; X86-BMI1-NEXT:    testb $32, %cl
-; X86-BMI1-NEXT:    jne .LBB28_2
+; X86-BMI1-NEXT:    jne .LBB29_2
 ; X86-BMI1-NEXT:  # %bb.1:
 ; X86-BMI1-NEXT:    movl %eax, %edx
-; X86-BMI1-NEXT:  .LBB28_2:
+; X86-BMI1-NEXT:  .LBB29_2:
 ; X86-BMI1-NEXT:    andnl {{[0-9]+}}(%esp), %edx, %eax
 ; X86-BMI1-NEXT:    retl
 ;
@@ -1960,11 +2048,11 @@ define i32 @bzhi64_32_b3(i64 %val, i8 %numlowbits) nounwind {
 ; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    xorl %ecx, %ecx
 ; X86-BMI2-NEXT:    testb $32, %al
-; X86-BMI2-NEXT:    jne .LBB28_2
+; X86-BMI2-NEXT:    jne .LBB29_2
 ; X86-BMI2-NEXT:  # %bb.1:
 ; X86-BMI2-NEXT:    movl $-1, %ecx
 ; X86-BMI2-NEXT:    shlxl %eax, %ecx, %ecx
-; X86-BMI2-NEXT:  .LBB28_2:
+; X86-BMI2-NEXT:  .LBB29_2:
 ; X86-BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %eax
 ; X86-BMI2-NEXT:    retl
 ;
@@ -2426,11 +2514,11 @@ define i64 @bzhi64_c0(i64 %val, i64 %numlowbits, ptr %escape) nounwind {
 ; X86-NOBMI-NEXT:    movl $-1, %edx
 ; X86-NOBMI-NEXT:    shrl %cl, %edx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB34_2
+; X86-NOBMI-NEXT:    je .LBB35_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edx, %eax
 ; X86-NOBMI-NEXT:    xorl %edx, %edx
-; X86-NOBMI-NEXT:  .LBB34_2:
+; X86-NOBMI-NEXT:  .LBB35_2:
 ; X86-NOBMI-NEXT:    movl %edx, 4(%esi)
 ; X86-NOBMI-NEXT:    movl %eax, (%esi)
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
@@ -2448,11 +2536,11 @@ define i64 @bzhi64_c0(i64 %val, i64 %numlowbits, ptr %escape) nounwind {
 ; X86-BMI1-NEXT:    movl $-1, %edx
 ; X86-BMI1-NEXT:    shrl %cl, %edx
 ; X86-BMI1-NEXT:    testb $32, %cl
-; X86-BMI1-NEXT:    je .LBB34_2
+; X86-BMI1-NEXT:    je .LBB35_2
 ; X86-BMI1-NEXT:  # %bb.1:
 ; X86-BMI1-NEXT:    movl %edx, %eax
 ; X86-BMI1-NEXT:    xorl %edx, %edx
-; X86-BMI1-NEXT:  .LBB34_2:
+; X86-BMI1-NEXT:  .LBB35_2:
 ; X86-BMI1-NEXT:    movl %edx, 4(%esi)
 ; X86-BMI1-NEXT:    movl %eax, (%esi)
 ; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
@@ -2469,11 +2557,11 @@ define i64 @bzhi64_c0(i64 %val, i64 %numlowbits, ptr %escape) nounwind {
 ; X86-BMI2-NEXT:    movl $-1, %eax
 ; X86-BMI2-NEXT:    shrxl %ebx, %eax, %edx
 ; X86-BMI2-NEXT:    testb $32, %bl
-; X86-BMI2-NEXT:    je .LBB34_2
+; X86-BMI2-NEXT:    je .LBB35_2
 ; X86-BMI2-NEXT:  # %bb.1:
 ; X86-BMI2-NEXT:    movl %edx, %eax
 ; X86-BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI2-NEXT:  .LBB34_2:
+; X86-BMI2-NEXT:  .LBB35_2:
 ; X86-BMI2-NEXT:    movl %edx, 4(%ecx)
 ; X86-BMI2-NEXT:    movl %eax, (%ecx)
 ; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
@@ -2530,11 +2618,11 @@ define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits, ptr %escape) nounwind
 ; X86-NOBMI-NEXT:    movl $-1, %edx
 ; X86-NOBMI-NEXT:    shrl %cl, %edx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB35_2
+; X86-NOBMI-NEXT:    je .LBB36_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edx, %eax
 ; X86-NOBMI-NEXT:    xorl %edx, %edx
-; X86-NOBMI-NEXT:  .LBB35_2:
+; X86-NOBMI-NEXT:  .LBB36_2:
 ; X86-NOBMI-NEXT:    movl %edx, 4(%esi)
 ; X86-NOBMI-NEXT:    movl %eax, (%esi)
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
@@ -2552,11 +2640,11 @@ define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits, ptr %escape) nounwind
 ; X86-BMI1-NEXT:    movl $-1, %edx
 ; X86-BMI1-NEXT:    shrl %cl, %edx
 ; X86-BMI1-NEXT:    testb $32, %cl
-; X86-BMI1-NEXT:    je .LBB35_2
+; X86-BMI1-NEXT:    je .LBB36_2
 ; X86-BMI1-NEXT:  # %bb.1:
 ; X86-BMI1-NEXT:    movl %edx, %eax
 ; X86-BMI1-NEXT:    xorl %edx, %edx
-; X86-BMI1-NEXT:  .LBB35_2:
+; X86-BMI1-NEXT:  .LBB36_2:
 ; X86-BMI1-NEXT:    movl %edx, 4(%esi)
 ; X86-BMI1-NEXT:    movl %eax, (%esi)
 ; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
@@ -2573,11 +2661,11 @@ define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits, ptr %escape) nounwind
 ; X86-BMI2-NEXT:    movl $-1, %eax
 ; X86-BMI2-NEXT:    shrxl %ebx, %eax, %edx
 ; X86-BMI2-NEXT:    testb $32, %bl
-; X86-BMI2-NEXT:    je .LBB35_2
+; X86-BMI2-NEXT:    je .LBB36_2
 ; X86-BMI2-NEXT:  # %bb.1:
 ; X86-BMI2-NEXT:    movl %edx, %eax
 ; X86-BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI2-NEXT:  .LBB35_2:
+; X86-BMI2-NEXT:  .LBB36_2:
 ; X86-BMI2-NEXT:    movl %edx, 4(%ecx)
 ; X86-BMI2-NEXT:    movl %eax, (%ecx)
 ; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
@@ -2639,11 +2727,11 @@ define i64 @bzhi64_c2_load(ptr %w, i64 %numlowbits, ptr %escape) nounwind {
 ; X86-NOBMI-NEXT:    movl $-1, %ebx
 ; X86-NOBMI-NEXT:    shrl %cl, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB36_2
+; X86-NOBMI-NEXT:    je .LBB37_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %ebx, %edi
 ; X86-NOBMI-NEXT:    xorl %ebx, %ebx
-; X86-NOBMI-NEXT:  .LBB36_2:
+; X86-NOBMI-NEXT:  .LBB37_2:
 ; X86-NOBMI-NEXT:    movl 4(%eax), %edx
 ; X86-NOBMI-NEXT:    andl %ebx, %edx
 ; X86-NOBMI-NEXT:    movl (%eax), %eax
@@ -2668,11 +2756,11 @@ define i64 @bzhi64_c2_load(ptr %w, i64 %numlowbits, ptr %escape) nounwind {
 ; X86-BMI1-NEXT:    movl $-1, %ebx
 ; X86-BMI1-NEXT:    shrl %cl, %ebx
 ; X86-BMI1-NEXT:    testb $32, %cl
-; X86-BMI1-NEXT:    je .LBB36_2
+; X86-BMI1-NEXT:    je .LBB37_2
 ; X86-BMI1-NEXT:  # %bb.1:
 ; X86-BMI1-NEXT:    movl %ebx, %edi
 ; X86-BMI1-NEXT:    xorl %ebx, %ebx
-; X86-BMI1-NEXT:  .LBB36_2:
+; X86-BMI1-NEXT:  .LBB37_2:
 ; X86-BMI1-NEXT:    movl 4(%eax), %edx
 ; X86-BMI1-NEXT:    andl %ebx, %edx
 ; X86-BMI1-NEXT:    movl (%eax), %eax
@@ -2695,11 +2783,11 @@ define i64 @bzhi64_c2_load(ptr %w, i64 %numlowbits, ptr %escape) nounwind {
 ; X86-BMI2-NEXT:    movl $-1, %esi
 ; X86-BMI2-NEXT:    shrxl %edx, %esi, %edi
 ; X86-BMI2-NEXT:    testb $32, %dl
-; X86-BMI2-NEXT:    je .LBB36_2
+; X86-BMI2-NEXT:    je .LBB37_2
 ; X86-BMI2-NEXT:  # %bb.1:
 ; X86-BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI2-NEXT:    xorl %edi, %edi
-; X86-BMI2-NEXT:  .LBB36_2:
+; X86-BMI2-NEXT:  .LBB37_2:
 ; X86-BMI2-NEXT:    movl 4(%eax), %edx
 ; X86-BMI2-NEXT:    andl %edi, %edx
 ; X86-BMI2-NEXT:    movl (%eax), %eax
@@ -2765,11 +2853,11 @@ define i64 @bzhi64_c3_load_indexzext(ptr %w, i8 %numlowbits, ptr %escape) nounwi
 ; X86-NOBMI-NEXT:    movl $-1, %ebx
 ; X86-NOBMI-NEXT:    shrl %cl, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB37_2
+; X86-NOBMI-NEXT:    je .LBB38_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %ebx, %edi
 ; X86-NOBMI-NEXT:    xorl %ebx, %ebx
-; X86-NOBMI-NEXT:  .LBB37_2:
+; X86-NOBMI-NEXT:  .LBB38_2:
 ; X86-NOBMI-NEXT:    movl 4(%eax), %edx
 ; X86-NOBMI-NEXT:    andl %ebx, %edx
 ; X86-NOBMI-NEXT:    movl (%eax), %eax
@@ -2794,11 +2882,11 @@ define i64 @bzhi64_c3_load_indexzext(ptr %w, i8 %numlowbits, ptr %escape) nounwi
 ; X86-BMI1-NEXT:    movl $-1, %ebx
 ; X86-BMI1-NEXT:    shrl %cl, %ebx
 ; X86-BMI1-NEXT:    testb $32, %cl
-; X86-BMI1-NEXT:    je .LBB37_2
+; X86-BMI1-NEXT:    je .LBB38_2
 ; X86-BMI1-NEXT:  # %bb.1:
 ; X86-BMI1-NEXT:    movl %ebx, %edi
 ; X86-BMI1-NEXT:    xorl %ebx, %ebx
-; X86-BMI1-NEXT:  .LBB37_2:
+; X86-BMI1-NEXT:  .LBB38_2:
 ; X86-BMI1-NEXT:    movl 4(%eax), %edx
 ; X86-BMI1-NEXT:    andl %ebx, %edx
 ; X86-BMI1-NEXT:    movl (%eax), %eax
@@ -2821,11 +2909,11 @@ define i64 @bzhi64_c3_load_indexzext(ptr %w, i8 %numlowbits, ptr %escape) nounwi
 ; X86-BMI2-NEXT:    movl $-1, %esi
 ; X86-BMI2-NEXT:    shrxl %edx, %esi, %edi
 ; X86-BMI2-NEXT:    testb $32, %dl
-; X86-BMI2-NEXT:    je .LBB37_2
+; X86-BMI2-NEXT:    je .LBB38_2
 ; X86-BMI2-NEXT:  # %bb.1:
 ; X86-BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI2-NEXT:    xorl %edi, %edi
-; X86-BMI2-NEXT:  .LBB37_2:
+; X86-BMI2-NEXT:  .LBB38_2:
 ; X86-BMI2-NEXT:    movl 4(%eax), %edx
 ; X86-BMI2-NEXT:    andl %edi, %edx
 ; X86-BMI2-NEXT:    movl (%eax), %eax
@@ -2890,11 +2978,11 @@ define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits, ptr %escape) nounwi
 ; X86-NOBMI-NEXT:    movl $-1, %edx
 ; X86-NOBMI-NEXT:    shrl %cl, %edx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB38_2
+; X86-NOBMI-NEXT:    je .LBB39_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edx, %eax
 ; X86-NOBMI-NEXT:    xorl %edx, %edx
-; X86-NOBMI-NEXT:  .LBB38_2:
+; X86-NOBMI-NEXT:  .LBB39_2:
 ; X86-NOBMI-NEXT:    movl %edx, 4(%esi)
 ; X86-NOBMI-NEXT:    movl %eax, (%esi)
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
@@ -2912,11 +3000,11 @@ define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits, ptr %escape) nounwi
 ; X86-BMI1-NEXT:    movl $-1, %edx
 ; X86-BMI1-NEXT:    shrl %cl, %edx
 ; X86-BMI1-NEXT:    testb $32, %cl
-; X86-BMI1-NEXT:    je .LBB38_2
+; X86-BMI1-NEXT:    je .LBB39_2
 ; X86-BMI1-NEXT:  # %bb.1:
 ; X86-BMI1-NEXT:    movl %edx, %eax
 ; X86-BMI1-NEXT:    xorl %edx, %edx
-; X86-BMI1-NEXT:  .LBB38_2:
+; X86-BMI1-NEXT:  .LBB39_2:
 ; X86-BMI1-NEXT:    movl %edx, 4(%esi)
 ; X86-BMI1-NEXT:    movl %eax, (%esi)
 ; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
@@ -2933,11 +3021,11 @@ define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits, ptr %escape) nounwi
 ; X86-BMI2-NEXT:    movl $-1, %eax
 ; X86-BMI2-NEXT:    shrxl %ebx, %eax, %edx
 ; X86-BMI2-NEXT:    testb $32, %bl
-; X86-BMI2-NEXT:    je .LBB38_2
+; X86-BMI2-NEXT:    je .LBB39_2
 ; X86-BMI2-NEXT:  # %bb.1:
 ; X86-BMI2-NEXT:    movl %edx, %eax
 ; X86-BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI2-NEXT:  .LBB38_2:
+; X86-BMI2-NEXT:  .LBB39_2:
 ; X86-BMI2-NEXT:    movl %edx, 4(%ecx)
 ; X86-BMI2-NEXT:    movl %eax, (%ecx)
 ; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
@@ -2994,10 +3082,10 @@ define i32 @bzhi64_32_c0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    movl $-1, %eax
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB39_2
+; X86-NOBMI-NEXT:    jne .LBB40_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl $-1, %eax
-; X86-NOBMI-NEXT:  .LBB39_2:
+; X86-NOBMI-NEXT:  .LBB40_2:
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    retl
 ;
@@ -3008,10 +3096,10 @@ define i32 @bzhi64_32_c0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1-NEXT:    movl $-1, %eax
 ; X86-BMI1-NEXT:    shrl %cl, %eax
 ; X86-BMI1-NEXT:    testb $32, %cl
-; X86-BMI1-NEXT:    jne .LBB39_2
+; X86-BMI1-NEXT:    jne .LBB40_2
 ; X86-BMI1-NEXT:  # %bb.1:
 ; X86-BMI1-NEXT:    movl $-1, %eax
-; X86-BMI1-NEXT:  .LBB39_2:
+; X86-BMI1-NEXT:  .LBB40_2:
 ; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1-NEXT:    retl
 ;
@@ -3021,10 +3109,10 @@ define i32 @bzhi64_32_c0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    movl $-1, %eax
 ; X86-BMI2-NEXT:    testb $32, %cl
-; X86-BMI2-NEXT:    je .LBB39_2
+; X86-BMI2-NEXT:    je .LBB40_2
 ; X86-BMI2-NEXT:  # %bb.1:
 ; X86-BMI2-NEXT:    shrxl %ecx, %eax, %eax
-; X86-BMI2-NEXT:  .LBB39_2:
+; X86-BMI2-NEXT:  .LBB40_2:
 ; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    retl
 ;
@@ -3175,10 +3263,10 @@ define i32 @bzhi64_32_c3(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NEXT:    movl $-1, %edx
 ; X86-NEXT:    shrdl %cl, %eax, %edx
 ; X86-NEXT:    testb $32, %cl
-; X86-NEXT:    jne .LBB42_2
+; X86-NEXT:    jne .LBB43_2
 ; X86-NEXT:  # %bb.1:
 ; X86-NEXT:    movl %edx, %eax
-; X86-NEXT:  .LBB42_2:
+; X86-NEXT:  .LBB43_2:
 ; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    retl
 ;
@@ -3450,26 +3538,26 @@ define i64 @bzhi64_d0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl %esi, %edi
-; X86-NOBMI-NEXT:    jne .LBB47_2
+; X86-NOBMI-NEXT:    jne .LBB48_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edi
-; X86-NOBMI-NEXT:  .LBB47_2:
+; X86-NOBMI-NEXT:  .LBB48_2:
 ; X86-NOBMI-NEXT:    movl %edi, %eax
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    xorl %ebx, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl $0, %edx
-; X86-NOBMI-NEXT:    jne .LBB47_4
+; X86-NOBMI-NEXT:    jne .LBB48_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %esi, %ebx
 ; X86-NOBMI-NEXT:    movl %eax, %edx
-; X86-NOBMI-NEXT:  .LBB47_4:
+; X86-NOBMI-NEXT:  .LBB48_4:
 ; X86-NOBMI-NEXT:    shrdl %cl, %edi, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB47_6
+; X86-NOBMI-NEXT:    jne .LBB48_6
 ; X86-NOBMI-NEXT:  # %bb.5:
 ; X86-NOBMI-NEXT:    movl %ebx, %eax
-; X86-NOBMI-NEXT:  .LBB47_6:
+; X86-NOBMI-NEXT:  .LBB48_6:
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    popl %ebx
@@ -3489,26 +3577,26 @@ define i64 @bzhi64_d0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1-NEXT:    shldl %cl, %edx, %eax
 ; X86-BMI1-NEXT:    testb $32, %cl
 ; X86-BMI1-NEXT:    movl %esi, %edi
-; X86-BMI1-NEXT:    jne .LBB47_2
+; X86-BMI1-NEXT:    jne .LBB48_2
 ; X86-BMI1-NEXT:  # %bb.1:
 ; X86-BMI1-NEXT:    movl %eax, %edi
-; X86-BMI1-NEXT:  .LBB47_2:
+; X86-BMI1-NEXT:  .LBB48_2:
 ; X86-BMI1-NEXT:    movl %edi, %eax
 ; X86-BMI1-NEXT:    shrl %cl, %eax
 ; X86-BMI1-NEXT:    xorl %ebx, %ebx
 ; X86-BMI1-NEXT:    testb $32, %cl
 ; X86-BMI1-NEXT:    movl $0, %edx
-; X86-BMI1-NEXT:    jne .LBB47_4
+; X86-BMI1-NEXT:    jne .LBB48_4
 ; X86-BMI1-NEXT:  # %bb.3:
 ; X86-BMI1-NEXT:    movl %esi, %ebx
 ; X86-BMI1-NEXT:    movl %eax, %edx
-; X86-BMI1-NEXT:  .LBB47_4:
+; X86-BMI1-NEXT:  .LBB48_4:
 ; X86-BMI1-NEXT:    shrdl %cl, %edi, %ebx
 ; X86-BMI1-NEXT:    testb $32, %cl
-; X86-BMI1-NEXT:    jne .LBB47_6
+; X86-BMI1-NEXT:    jne .LBB48_6
 ; X86-BMI1-NEXT:  # %bb.5:
 ; X86-BMI1-NEXT:    movl %ebx, %eax
-; X86-BMI1-NEXT:  .LBB47_6:
+; X86-BMI1-NEXT:  .LBB48_6:
 ; X86-BMI1-NEXT:    popl %esi
 ; X86-BMI1-NEXT:    popl %edi
 ; X86-BMI1-NEXT:    popl %ebx
@@ -3526,22 +3614,22 @@ define i64 @bzhi64_d0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI2-NEXT:    shlxl %ecx, %eax, %edi
 ; X86-BMI2-NEXT:    xorl %edx, %edx
 ; X86-BMI2-NEXT:    testb $32, %cl
-; X86-BMI2-NEXT:    je .LBB47_2
+; X86-BMI2-NEXT:    je .LBB48_2
 ; X86-BMI2-NEXT:  # %bb.1:
 ; X86-BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI2-NEXT:    movl $0, %edi
-; X86-BMI2-NEXT:  .LBB47_2:
+; X86-BMI2-NEXT:  .LBB48_2:
 ; X86-BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI2-NEXT:    jne .LBB47_4
+; X86-BMI2-NEXT:    jne .LBB48_4
 ; X86-BMI2-NEXT:  # %bb.3:
 ; X86-BMI2-NEXT:    movl %eax, %edx
-; X86-BMI2-NEXT:  .LBB47_4:
+; X86-BMI2-NEXT:  .LBB48_4:
 ; X86-BMI2-NEXT:    shrdl %cl, %esi, %edi
 ; X86-BMI2-NEXT:    testb $32, %cl
-; X86-BMI2-NEXT:    jne .LBB47_6
+; X86-BMI2-NEXT:    jne .LBB48_6
 ; X86-BMI2-NEXT:  # %bb.5:
 ; X86-BMI2-NEXT:    movl %edi, %eax
-; X86-BMI2-NEXT:  .LBB47_6:
+; X86-BMI2-NEXT:  .LBB48_6:
 ; X86-BMI2-NEXT:    popl %esi
 ; X86-BMI2-NEXT:    popl %edi
 ; X86-BMI2-NEXT:    retl
@@ -3587,26 +3675,26 @@ define i64 @bzhi64_d1_indexzext(i64 %val, i8 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl %esi, %edi
-; X86-NOBMI-NEXT:    jne .LBB48_2
+; X86-NOBMI-NEXT:    jne .LBB49_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edi
-; X86-NOBMI-NEXT:  .LBB48_2:
+; X86-NOBMI-NEXT:  .LBB49_2:
 ; X86-NOBMI-NEXT:    movl %edi, %eax
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    xorl %ebx, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl $0, %edx
-; X86-NOBMI-NEXT:    jne .LBB48_4
+; X86-NOBMI-NEXT:    jne .LBB49_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %esi, %ebx
 ; X86-NOBMI-NEXT:    movl %eax, %edx
-; X86-NOBMI-NEXT:  .LBB48_4:
+; X86-NOBMI-NEXT:  .LBB49_4:
 ; X86-NOBMI-NEXT:    shrdl %cl, %edi, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB48_6
+; X86-NOBMI-NEXT:    jne .LBB49_6
 ; X86-NOBMI-NEXT:  # %bb.5:
 ; X86-NOBMI-NEXT:    movl %ebx, %eax
-; X86-NOBMI-NEXT:  .LBB48_6:
+; X86-NOBMI-NEXT:  .LBB49_6:
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    popl %ebx
@@ -3626,26 +3714,26 @@ define i64 @bzhi64_d1_indexzext(i64 %val, i8 %numlowbits) nounwind {
 ; X86-BMI1-NEXT:    shldl %cl, %edx, %eax
 ; X86-BMI1-NEXT:    testb $32, %cl
 ; X86-BMI1-NEXT:    movl %esi, %edi
-; X86-BMI1-NEXT:    jne .LBB48_2
+; X86-BMI1-NEXT:    jne .LBB49_2
 ; X86-BMI1-NEXT:  # %bb.1:
 ; X86-BMI1-NEXT:    movl %eax, %edi
-; X86-BMI1-NEXT:  .LBB48_2:
+; X86-BMI1-NEXT:  .LBB49_2:
 ; X86-BMI1-NEXT:    movl %edi, %eax
 ; X86-BMI1-NEXT:    shrl %cl, %eax
 ; X86-BMI1-NEXT:    xorl %ebx, %ebx
 ; X86-BMI1-NEXT:    testb $32, %cl
 ; X86-BMI1-NEXT:    movl $0, %edx
-; X86-BMI1-NEXT:    jne .LBB48_4
+; X86-BMI1-NEXT:    jne .LBB49_4
 ; X86-BMI1-NEXT:  # %bb.3:
 ; X86-BMI1-NEXT:    movl %esi, %ebx
 ; X86-BMI1-NEXT:    movl %eax, %edx
-; X86-BMI1-NEXT:  .LBB48_4:
+; X86-BMI1-NEXT:  .LBB49_4:
 ; X86-BMI1-NEXT:    shrdl %cl, %edi, %ebx
 ; X86-BMI1-NEXT:    testb $32, %cl
-; X86-BMI1-NEXT:    jne .LBB48_6
+; X86-BMI1-NEXT:    jne .LBB49_6
 ; X86-BMI1-NEXT:  # %bb.5:
 ; X86-BMI1-NEXT:    movl %ebx, %eax
-; X86-BMI1-NEXT:  .LBB48_6:
+; X86-BMI1-NEXT:  .LBB49_6:
 ; X86-BMI1-NEXT:    popl %esi
 ; X86-BMI1-NEXT:    popl %edi
 ; X86-BMI1-NEXT:    popl %ebx
@@ -3663,22 +3751,22 @@ define i64 @bzhi64_d1_indexzext(i64 %val, i8 %numlowbits) nounwind {
 ; X86-BMI2-NEXT:    shlxl %ecx, %eax, %edi
 ; X86-BMI2-NEXT:    xorl %edx, %edx
 ; X86-BMI2-NEXT:    testb $32, %cl
-; X86-BMI2-NEXT:    je .LBB48_2
+; X86-BMI2-NEXT:    je .LBB49_2
 ; X86-BMI2-NEXT:  # %bb.1:
 ; X86-BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI2-NEXT:    movl $0, %edi
-; X86-BMI2-NEXT:  .LBB48_2:
+; X86-BMI2-NEXT:  .LBB49_2:
 ; X86-BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI2-NEXT:    jne .LBB48_4
+; X86-BMI2-NEXT:    jne .LBB49_4
 ; X86-BMI2-NEXT:  # %bb.3:
 ; X86-BMI2-NEXT:    movl %eax, %edx
-; X86-BMI2-NEXT:  .LBB48_4:
+; X86-BMI2-NEXT:  .LBB49_4:
 ; X86-BMI2-NEXT:    shrdl %cl, %esi, %edi
 ; X86-BMI2-NEXT:    testb $32, %cl
-; X86-BMI2-NEXT:    jne .LBB48_6
+; X86-BMI2-NEXT:    jne .LBB49_6
 ; X86-BMI2-NEXT:  # %bb.5:
 ; X86-BMI2-NEXT:    movl %edi, %eax
-; X86-BMI2-NEXT:  .LBB48_6:
+; X86-BMI2-NEXT:  .LBB49_6:
 ; X86-BMI2-NEXT:    popl %esi
 ; X86-BMI2-NEXT:    popl %edi
 ; X86-BMI2-NEXT:    retl
@@ -3728,26 +3816,26 @@ define i64 @bzhi64_d2_load(ptr %w, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl %esi, %edi
-; X86-NOBMI-NEXT:    jne .LBB49_2
+; X86-NOBMI-NEXT:    jne .LBB50_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edi
-; X86-NOBMI-NEXT:  .LBB49_2:
+; X86-NOBMI-NEXT:  .LBB50_2:
 ; X86-NOBMI-NEXT:    movl %edi, %eax
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    xorl %ebx, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl $0, %edx
-; X86-NOBMI-NEXT:    jne .LBB49_4
+; X86-NOBMI-NEXT:    jne .LBB50_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %esi, %ebx
 ; X86-NOBMI-NEXT:    movl %eax, %edx
-; X86-NOBMI-NEXT:  .LBB49_4:
+; X86-NOBMI-NEXT:  .LBB50_4:
 ; X86-NOBMI-NEXT:    shrdl %cl, %edi, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB49_6
+; X86-NOBMI-NEXT:    jne .LBB50_6
 ; X86-NOBMI-NEXT:  # %bb.5:
 ; X86-NOBMI-NEXT:    movl %ebx, %eax
-; X86-NOBMI-NEXT:  .LBB49_6:
+; X86-NOBMI-NEXT:  .LBB50_6:
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    popl %ebx
@@ -3768,26 +3856,26 @@ define i64 @bzhi64_d2_load(ptr %w, i64 %numlowbits) nounwind {
 ; X86-BMI1-NEXT:    shldl %cl, %edx, %eax
 ; X86-BMI1-NEXT:    testb $32, %cl
 ; X86-BMI1-NEXT:    movl %esi, %edi
-; X86-BMI1-NEXT:    jne .LBB49_2
+; X86-BMI1-NEXT:    jne .LBB50_2
 ; X86-BMI1-NEXT:  # %bb.1:
 ; X86-BMI1-NEXT:    movl %eax, %edi
-; X86-BMI1-NEXT:  .LBB49_2:
+; X86-BMI1-NEXT:  .LBB50_2:
 ; X86-BMI1-NEXT:    movl %edi, %eax
 ; X86-BMI1-NEXT:    shrl %cl, %eax
 ; X86-BMI1-NEXT:    xorl %ebx, %ebx
 ; X86-BMI1-NEXT:    testb $32, %cl
 ; X86-BMI1-NEXT:    movl $0, %edx
-; X86-BMI1-NEXT:    jne .LBB49_4
+; X86-BMI1-NEXT:    jne .LBB50_4
 ; X86-BMI1-NEXT:  # %bb.3:
 ; X86-BMI1-NEXT:    movl %esi, %ebx
 ; X86-BMI1-NEXT:    movl %eax, %edx
-; X86-BMI1-NEXT:  .LBB49_4:
+; X86-BMI1-NEXT:  .LBB50_4:
 ; X86-BMI1-NEXT:    shrdl %cl, %edi, %ebx
 ; X86-BMI1-NEXT:    testb $32, %cl
-; X86-BMI1-NEXT:    jne .LBB49_6
+; X86-BMI1-NEXT:    jne .LBB50_6
 ; X86-BMI1-NEXT:  # %bb.5:
 ; X86-BMI1-NEXT:    movl %ebx, %eax
-; X86-BMI1-NEXT:  .LBB49_6:
+; X86-BMI1-NEXT:  .LBB50_6:
 ; X86-BMI1-NEXT:    popl %esi
 ; X86-BMI1-NEXT:    popl %edi
 ; X86-BMI1-NEXT:    popl %ebx
@@ -3806,22 +3894,22 @@ define i64 @bzhi64_d2_load(ptr %w, i64 %numlowbits) nounwind {
 ; X86-BMI2-NEXT:    shlxl %ecx, %edx, %edi
 ; X86-BMI2-NEXT:    xorl %edx, %edx
 ; X86-BMI2-NEXT:    testb $32, %cl
-; X86-BMI2-NEXT:    je .LBB49_2
+; X86-BMI2-NEXT:    je .LBB50_2
 ; X86-BMI2-NEXT:  # %bb.1:
 ; X86-BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI2-NEXT:    movl $0, %edi
-; X86-BMI2-NEXT:  .LBB49_2:
+; X86-BMI2-NEXT:  .LBB50_2:
 ; X86-BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI2-NEXT:    jne .LBB49_4
+; X86-BMI2-NEXT:    jne .LBB50_4
 ; X86-BMI2-NEXT:  # %bb.3:
 ; X86-BMI2-NEXT:    movl %eax, %edx
-; X86-BMI2-NEXT:  .LBB49_4:
+; X86-BMI2-NEXT:  .LBB50_4:
 ; X86-BMI2-NEXT:    shrdl %cl, %esi, %edi
 ; X86-BMI2-NEXT:    testb $32, %cl
-; X86-BMI2-NEXT:    jne .LBB49_6
+; X86-BMI2-NEXT:    jne .LBB50_6
 ; X86-BMI2-NEXT:  # %bb.5:
 ; X86-BMI2-NEXT:    movl %edi, %eax
-; X86-BMI2-NEXT:  .LBB49_6:
+; X86-BMI2-NEXT:  .LBB50_6:
 ; X86-BMI2-NEXT:    popl %esi
 ; X86-BMI2-NEXT:    popl %edi
 ; X86-BMI2-NEXT:    retl
@@ -3869,26 +3957,26 @@ define i64 @bzhi64_d3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl %esi, %edi
-; X86-NOBMI-NEXT:    jne .LBB50_2
+; X86-NOBMI-NEXT:    jne .LBB51_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %eax, %edi
-; X86-NOBMI-NEXT:  .LBB50_2:
+; X86-NOBMI-NEXT:  .LBB51_2:
 ; X86-NOBMI-NEXT:    movl %edi, %eax
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    xorl %ebx, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    movl $0, %edx
-; X86-NOBMI-NEXT:    jne .LBB50_4
+; X86-NOBMI-NEXT:    jne .LBB51_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %esi, %ebx
 ; X86-NOBMI-NEXT:    movl %eax, %edx
-; X86-NOBMI-NEXT:  .LBB50_4:
+; X86-NOBMI-NEXT:  .LBB51_4:
 ; X86-NOBMI-NEXT:    shrdl %cl, %edi, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB50_6
+; X86-NOBMI-NEXT:    jne .LBB51_6
 ; X86-NOBMI-NEXT:  # %bb.5:
 ; X86-NOBMI-NEXT:    movl %ebx, %eax
-; X86-NOBMI-NEXT:  .LBB50_6:
+; X86-NOBMI-NEXT:  .LBB51_6:
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    popl %ebx
@@ -3909,26 +3997,26 @@ define i64 @bzhi64_d3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
 ; X86-BMI1-NEXT:    shldl %cl, %edx, %eax
 ; X86-BMI1-NEXT:    testb $32, %cl
 ; X86-BMI1-NEXT:    movl %esi, %edi
-; X86-BMI1-NEXT:    jne .LBB50_2
+; X86-BMI1-NEXT:    jne .LBB51_2
 ; X86-BMI1-NEXT:  # %bb.1:
 ; X86-BMI1-NEXT:    movl %eax, %edi
-; X86-BMI1-NEXT:  .LBB50_2:
+; X86-BMI1-NEXT:  .LBB51_2:
 ; X86-BMI1-NEXT:    movl %edi, %eax
 ; X86-BMI1-NEXT:    shrl %cl, %eax
 ; X86-BMI1-NEXT:    xorl %ebx, %ebx
 ; X86-BMI1-NEXT:    testb $32, %cl
 ; X86-BMI1-NEXT:    movl $0, %edx
-; X86-BMI1-NEXT:    jne .LBB50_4
+; X86-BMI1-NEXT:    jne .LBB51_4
 ; X86-BMI1-NEXT:  # %bb.3:
 ; X86-BMI1-NEXT:    movl %esi, %ebx
 ; X86-BMI1-NEXT:    movl %eax, %edx
-; X86-BMI1-NEXT:  .LBB50_4:
+; X86-BMI1-NEXT:  .LBB51_4:
 ; X86-BMI1-NEXT:    shrdl %cl, %edi, %ebx
 ; X86-BMI1-NEXT:    testb $32, %cl
-; X86-BMI1-NEXT:    jne .LBB50_6
+; X86-BMI1-NEXT:    jne .LBB51_6
 ; X86-BMI1-NEXT:  # %bb.5:
 ; X86-BMI1-NEXT:    movl %ebx, %eax
-; X86-BMI1-NEXT:  .LBB50_6:
+; X86-BMI1-NEXT:  .LBB51_6:
 ; X86-BMI1-NEXT:    popl %esi
 ; X86-BMI1-NEXT:    popl %edi
 ; X86-BMI1-NEXT:    popl %ebx
@@ -3947,22 +4035,22 @@ define i64 @bzhi64_d3_load_indexzext(ptr %w, i8 %numlowbits) nounwind {
 ; X86-BMI2-NEXT:    shlxl %ecx, %edx, %edi
 ; X86-BMI2-NEXT:    xorl %edx, %edx
 ; X86-BMI2-NEXT:    testb $32, %cl
-; X86-BMI2-NEXT:    je .LBB50_2
+; X86-BMI2-NEXT:    je .LBB51_2
 ; X86-BMI2-NEXT:  # %bb.1:
 ; X86-BMI2-NEXT:    movl %edi, %esi
 ; X86-BMI2-NEXT:    movl $0, %edi
-; X86-BMI2-NEXT:  .LBB50_2:
+; X86-BMI2-NEXT:  .LBB51_2:
 ; X86-BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI2-NEXT:    jne .LBB50_4
+; X86-BMI2-NEXT:    jne .LBB51_4
 ; X86-BMI2-NEXT:  # %bb.3:
 ; X86-BMI2-NEXT:    movl %eax, %edx
-; X86-BMI2-NEXT:  .LBB50_4:
+; X86-BMI2-NEXT:  .LBB51_4:
 ; X86-BMI2-NEXT:    shrdl %cl, %esi, %edi
 ; X86-BMI2-NEXT:    testb $32, %cl
-; X86-BMI2-NEXT:    jne .LBB50_6
+; X86-BMI2-NEXT:    jne .LBB51_6
 ; X86-BMI2-NEXT:  # %bb.5:
 ; X86-BMI2-NEXT:    movl %edi, %eax
-; X86-BMI2-NEXT:  .LBB50_6:
+; X86-BMI2-NEXT:  .LBB51_6:
 ; X86-BMI2-NEXT:    popl %esi
 ; X86-BMI2-NEXT:    popl %edi
 ; X86-BMI2-NEXT:    retl
@@ -4012,18 +4100,18 @@ define i32 @bzhi64_32_d0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shll %cl, %edx
 ; X86-NOBMI-NEXT:    shldl %cl, %esi, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    je .LBB51_2
+; X86-NOBMI-NEXT:    je .LBB52_2
 ; X86-NOBMI-NEXT:  # %bb.1:
 ; X86-NOBMI-NEXT:    movl %edx, %eax
 ; X86-NOBMI-NEXT:    xorl %edx, %edx
-; X86-NOBMI-NEXT:  .LBB51_2:
+; X86-NOBMI-NEXT:  .LBB52_2:
 ; X86-NOBMI-NEXT:    shrdl %cl, %eax, %edx
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
-; X86-NOBMI-NEXT:    jne .LBB51_4
+; X86-NOBMI-NEXT:    jne .LBB52_4
 ; X86-NOBMI-NEXT:  # %bb.3:
 ; X86-NOBMI-NEXT:    movl %edx, %eax
-; X86-NOBMI-NEXT:  .LBB51_4:
+; X86-NOBMI-NEXT:  .LBB52_4:
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
 ;
@@ -4038,18 +4126,18 @@ define i32 @bzhi64_32_d0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI1-NEXT:    shll %cl, %edx
 ; X86-BMI1-NEXT:    shldl %cl, %esi, %eax
 ; X86-BMI1-NEXT:    testb $32, %cl
-; X86-BMI1-NEXT:    je .LBB51_2
+; X86-BMI1-NEXT:    je .LBB52_2
 ; X86-BMI1-NEXT:  # %bb.1:
 ; X86-BMI1-NEXT:    movl %edx, %eax
 ; X86-BMI1-NEXT:    xorl %edx, %edx
-; X86-BMI1-NEXT:  .LBB51_2:
+; X86-BMI1-NEXT:  .LBB52_2:
 ; X86-BMI1-NEXT:    shrdl %cl, %eax, %edx
 ; X86-BMI1-NEXT:    shrl %cl, %eax
 ; X86-BMI1-NEXT:    testb $32, %cl
-; X86-BMI1-NEXT:    jne .LBB51_4
+; X86-BMI1-NEXT:    jne .LBB52_4
 ; X86-BMI1-NEXT:  # %bb.3:
 ; X86-BMI1-NEXT:    movl %edx, %eax
-; X86-BMI1-NEXT:  .LBB51_4:
+; X86-BMI1-NEXT:  .LBB52_4:
 ; X86-BMI1-NEXT:    popl %esi
 ; X86-BMI1-NEXT:    retl
 ;
@@ -4062,17 +4150,17 @@ define i32 @bzhi64_32_d0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
 ; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
 ; X86-BMI2-NEXT:    testb $32, %cl
-; X86-BMI2-NEXT:    je .LBB51_2
+; X86-BMI2-NEXT:    je .LBB52_2
 ; X86-BMI2-NEXT:  # %bb.1:
 ; X86-BMI2-NEXT:    movl %eax, %edx
 ; X86-BMI2-NEXT:    xorl %eax, %eax
-; X86-BMI2-NEXT:  .LBB51_2:
+; X86-BMI2-NEXT:  .LBB52_2:
 ; X86-BMI2-NEXT:    shrdl %cl, %edx, %eax
 ; X86-BMI2-NEXT:    testb $32, %cl
-; X86-BMI2-NEXT:    je .LBB51_4
+; X86-BMI2-NEXT:    je .LBB52_4
 ; X86-BMI2-NEXT:  # %bb.3:
 ; X86-BMI2-NEXT:    shrxl %ecx, %edx, %eax
-; X86-BMI2-NEXT:  .LBB51_4:
+; X86-BMI2-NEXT:  .LBB52_4:
 ; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_32_d0:


        


More information about the llvm-commits mailing list