[llvm-branch-commits] [llvm] e5039aa - [X86] Regenerate bit extraction tests, cleaning up check-prefixes.

Simon Pilgrim via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Dec 16 06:52:49 PST 2020


Author: Simon Pilgrim
Date: 2020-12-16T14:48:21Z
New Revision: e5039aad45740a3017d774673867eccbbe6b0ce0

URL: https://github.com/llvm/llvm-project/commit/e5039aad45740a3017d774673867eccbbe6b0ce0
DIFF: https://github.com/llvm/llvm-project/commit/e5039aad45740a3017d774673867eccbbe6b0ce0.diff

LOG: [X86] Regenerate bit extraction tests, cleaning up check-prefixes.

As noticed on D92965, we needed to simplify the prefixes to ensure all RUNs were properly covered.

We should never have a target with BMI2 without BMI1, so use that as the 'BMI level' and then check with/without TBM (all TBM targets have at least BMI1).

Added: 
    

Modified: 
    llvm/test/CodeGen/X86/extract-bits.ll
    llvm/test/CodeGen/X86/extract-lowbits.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/X86/extract-bits.ll b/llvm/test/CodeGen/X86/extract-bits.ll
index c128a00fc91a..408307439c9d 100644
--- a/llvm/test/CodeGen/X86/extract-bits.ll
+++ b/llvm/test/CodeGen/X86/extract-bits.ll
@@ -1,14 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X86,NOBMI,X86-NOBMI
-; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1NOTBM,X86-BMI1NOTBM
-; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1TBM,X86-BMI1TBM
-; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1BMI2,X86-BMI1BMI2,BMI1TBM,X86-BMI1TBM,BMI1TBMBMI2,X86-BMI1TBMBMI2
-; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1BMI2,X86-BMI1BMI2,BMI1NOTBMBMI2,X86-BMI1NOTBMBMI2
-; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X64,NOBMI,X64-NOBMI
-; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1NOTBM,X64-BMI1NOTBM
-; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1TBM,X64-BMI1TBM
-; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1BMI2,X64-BMI1BMI2,BMI1TBM,X64-BMI1TBM,BMI1TBMBMI2,X64-BMI1TBMBMI2
-; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1BMI2,X64-BMI1BMI2,BMI1NOTBMBMI2,X64-BMI1NOTBMBMI2
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86,X86-NOBMI
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86,X86-BMINOTBM,X86-BMI1
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86,X86-BMITBM,X86-BMI1
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86,X86-BMITBM,X86-BMI2
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86,X86-BMINOTBM,X86-BMI2
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64,X64-NOBMI
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64,X64-BMINOTBM,X64-BMI1
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64,X64-BMITBM,X64-BMI1
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64,X64-BMITBM,X64-BMI2
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64,X64-BMINOTBM,X64-BMI2
 
 ; *Please* keep in sync with test/CodeGen/AArch64/extract-bits.ll
 
@@ -46,22 +46,22 @@ define i32 @bextr32_a0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr32_a0:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1NOTBM-NEXT:    shll $8, %eax
-; X86-BMI1NOTBM-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1NOTBM-NEXT:    orl %eax, %ecx
-; X86-BMI1NOTBM-NEXT:    bextrl %ecx, {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr32_a0:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    bzhil %eax, %ecx, %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr32_a0:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    shll $8, %eax
+; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    orl %eax, %ecx
+; X86-BMI1-NEXT:    bextrl %ecx, {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr32_a0:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    shrxl %ecx, {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    bzhil %eax, %ecx, %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr32_a0:
 ; X64-NOBMI:       # %bb.0:
@@ -75,19 +75,19 @@ define i32 @bextr32_a0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    andl %edi, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr32_a0:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrl %eax, %edi, %eax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr32_a0:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxl %esi, %edi, %eax
-; X64-BMI1BMI2-NEXT:    bzhil %edx, %eax, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr32_a0:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrl %eax, %edi, %eax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr32_a0:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxl %esi, %edi, %eax
+; X64-BMI2-NEXT:    bzhil %edx, %eax, %eax
+; X64-BMI2-NEXT:    retq
   %shifted = lshr i32 %val, %numskipbits
   %onebit = shl i32 1, %numlowbits
   %mask = add nsw i32 %onebit, -1
@@ -111,23 +111,23 @@ define i32 @bextr32_a0_arithmetic(i32 %val, i32 %numskipbits, i32 %numlowbits) n
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr32_a0_arithmetic:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1NOTBM-NEXT:    sarl %cl, %edx
-; X86-BMI1NOTBM-NEXT:    shll $8, %eax
-; X86-BMI1NOTBM-NEXT:    bextrl %eax, %edx, %eax
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr32_a0_arithmetic:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    sarxl %ecx, {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    bzhil %eax, %ecx, %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr32_a0_arithmetic:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI1-NEXT:    sarl %cl, %edx
+; X86-BMI1-NEXT:    shll $8, %eax
+; X86-BMI1-NEXT:    bextrl %eax, %edx, %eax
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr32_a0_arithmetic:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    sarxl %ecx, {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    bzhil %eax, %ecx, %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr32_a0_arithmetic:
 ; X64-NOBMI:       # %bb.0:
@@ -141,20 +141,20 @@ define i32 @bextr32_a0_arithmetic(i32 %val, i32 %numskipbits, i32 %numlowbits) n
 ; X64-NOBMI-NEXT:    andl %edi, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr32_a0_arithmetic:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    movl %esi, %ecx
-; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-BMI1NOTBM-NEXT:    sarl %cl, %edi
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    bextrl %edx, %edi, %eax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr32_a0_arithmetic:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    sarxl %esi, %edi, %eax
-; X64-BMI1BMI2-NEXT:    bzhil %edx, %eax, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr32_a0_arithmetic:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    movl %esi, %ecx
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI1-NEXT:    sarl %cl, %edi
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    bextrl %edx, %edi, %eax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr32_a0_arithmetic:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    sarxl %esi, %edi, %eax
+; X64-BMI2-NEXT:    bzhil %edx, %eax, %eax
+; X64-BMI2-NEXT:    retq
   %shifted = ashr i32 %val, %numskipbits
   %onebit = shl i32 1, %numlowbits
   %mask = add nsw i32 %onebit, -1
@@ -178,22 +178,22 @@ define i32 @bextr32_a1_indexzext(i32 %val, i8 zeroext %numskipbits, i8 zeroext %
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr32_a1_indexzext:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1NOTBM-NEXT:    shll $8, %eax
-; X86-BMI1NOTBM-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1NOTBM-NEXT:    orl %eax, %ecx
-; X86-BMI1NOTBM-NEXT:    bextrl %ecx, {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr32_a1_indexzext:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    bzhil %eax, %ecx, %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr32_a1_indexzext:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    shll $8, %eax
+; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    orl %eax, %ecx
+; X86-BMI1-NEXT:    bextrl %ecx, {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr32_a1_indexzext:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    shrxl %ecx, {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    bzhil %eax, %ecx, %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr32_a1_indexzext:
 ; X64-NOBMI:       # %bb.0:
@@ -207,19 +207,19 @@ define i32 @bextr32_a1_indexzext(i32 %val, i8 zeroext %numskipbits, i8 zeroext %
 ; X64-NOBMI-NEXT:    andl %edi, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr32_a1_indexzext:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrl %eax, %edi, %eax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr32_a1_indexzext:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxl %esi, %edi, %eax
-; X64-BMI1BMI2-NEXT:    bzhil %edx, %eax, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr32_a1_indexzext:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrl %eax, %edi, %eax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr32_a1_indexzext:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxl %esi, %edi, %eax
+; X64-BMI2-NEXT:    bzhil %edx, %eax, %eax
+; X64-BMI2-NEXT:    retq
   %skip = zext i8 %numskipbits to i32
   %shifted = lshr i32 %val, %skip
   %conv = zext i8 %numlowbits to i32
@@ -246,24 +246,24 @@ define i32 @bextr32_a2_load(i32* %w, i32 %numskipbits, i32 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr32_a2_load:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    shll $8, %ecx
-; X86-BMI1NOTBM-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; X86-BMI1NOTBM-NEXT:    orl %ecx, %edx
-; X86-BMI1NOTBM-NEXT:    bextrl %edx, (%eax), %eax
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr32_a2_load:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %dl
-; X86-BMI1BMI2-NEXT:    shrxl %edx, (%ecx), %ecx
-; X86-BMI1BMI2-NEXT:    bzhil %eax, %ecx, %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr32_a2_load:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    shll $8, %ecx
+; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-BMI1-NEXT:    orl %ecx, %edx
+; X86-BMI1-NEXT:    bextrl %edx, (%eax), %eax
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr32_a2_load:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-BMI2-NEXT:    shrxl %edx, (%ecx), %ecx
+; X86-BMI2-NEXT:    bzhil %eax, %ecx, %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr32_a2_load:
 ; X64-NOBMI:       # %bb.0:
@@ -278,19 +278,19 @@ define i32 @bextr32_a2_load(i32* %w, i32 %numskipbits, i32 %numlowbits) nounwind
 ; X64-NOBMI-NEXT:    andl %esi, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr32_a2_load:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrl %eax, (%rdi), %eax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr32_a2_load:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxl %esi, (%rdi), %eax
-; X64-BMI1BMI2-NEXT:    bzhil %edx, %eax, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr32_a2_load:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrl %eax, (%rdi), %eax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr32_a2_load:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxl %esi, (%rdi), %eax
+; X64-BMI2-NEXT:    bzhil %edx, %eax, %eax
+; X64-BMI2-NEXT:    retq
   %val = load i32, i32* %w
   %shifted = lshr i32 %val, %numskipbits
   %onebit = shl i32 1, %numlowbits
@@ -316,24 +316,24 @@ define i32 @bextr32_a3_load_indexzext(i32* %w, i8 zeroext %numskipbits, i8 zeroe
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr32_a3_load_indexzext:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    shll $8, %ecx
-; X86-BMI1NOTBM-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; X86-BMI1NOTBM-NEXT:    orl %ecx, %edx
-; X86-BMI1NOTBM-NEXT:    bextrl %edx, (%eax), %eax
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr32_a3_load_indexzext:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %dl
-; X86-BMI1BMI2-NEXT:    shrxl %edx, (%ecx), %ecx
-; X86-BMI1BMI2-NEXT:    bzhil %eax, %ecx, %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr32_a3_load_indexzext:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    shll $8, %ecx
+; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-BMI1-NEXT:    orl %ecx, %edx
+; X86-BMI1-NEXT:    bextrl %edx, (%eax), %eax
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr32_a3_load_indexzext:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-BMI2-NEXT:    shrxl %edx, (%ecx), %ecx
+; X86-BMI2-NEXT:    bzhil %eax, %ecx, %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr32_a3_load_indexzext:
 ; X64-NOBMI:       # %bb.0:
@@ -348,19 +348,19 @@ define i32 @bextr32_a3_load_indexzext(i32* %w, i8 zeroext %numskipbits, i8 zeroe
 ; X64-NOBMI-NEXT:    andl %esi, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr32_a3_load_indexzext:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrl %eax, (%rdi), %eax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr32_a3_load_indexzext:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxl %esi, (%rdi), %eax
-; X64-BMI1BMI2-NEXT:    bzhil %edx, %eax, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr32_a3_load_indexzext:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrl %eax, (%rdi), %eax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr32_a3_load_indexzext:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxl %esi, (%rdi), %eax
+; X64-BMI2-NEXT:    bzhil %edx, %eax, %eax
+; X64-BMI2-NEXT:    retq
   %val = load i32, i32* %w
   %skip = zext i8 %numskipbits to i32
   %shifted = lshr i32 %val, %skip
@@ -387,22 +387,22 @@ define i32 @bextr32_a4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits)
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr32_a4_commutative:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1NOTBM-NEXT:    shll $8, %eax
-; X86-BMI1NOTBM-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1NOTBM-NEXT:    orl %eax, %ecx
-; X86-BMI1NOTBM-NEXT:    bextrl %ecx, {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr32_a4_commutative:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    bzhil %eax, %ecx, %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr32_a4_commutative:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    shll $8, %eax
+; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    orl %eax, %ecx
+; X86-BMI1-NEXT:    bextrl %ecx, {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr32_a4_commutative:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    shrxl %ecx, {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    bzhil %eax, %ecx, %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr32_a4_commutative:
 ; X64-NOBMI:       # %bb.0:
@@ -416,19 +416,19 @@ define i32 @bextr32_a4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits)
 ; X64-NOBMI-NEXT:    andl %edi, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr32_a4_commutative:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrl %eax, %edi, %eax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr32_a4_commutative:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxl %esi, %edi, %eax
-; X64-BMI1BMI2-NEXT:    bzhil %edx, %eax, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr32_a4_commutative:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrl %eax, %edi, %eax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr32_a4_commutative:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxl %esi, %edi, %eax
+; X64-BMI2-NEXT:    bzhil %edx, %eax, %eax
+; X64-BMI2-NEXT:    retq
   %shifted = lshr i32 %val, %numskipbits
   %onebit = shl i32 1, %numlowbits
   %mask = add nsw i32 %onebit, -1
@@ -460,37 +460,37 @@ define i32 @bextr32_a5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr32_a5_skipextrauses:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    subl $8, %esp
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    shll $8, %ecx
-; X86-BMI1NOTBM-NEXT:    movzbl %al, %edx
-; X86-BMI1NOTBM-NEXT:    orl %ecx, %edx
-; X86-BMI1NOTBM-NEXT:    bextrl %edx, {{[0-9]+}}(%esp), %esi
-; X86-BMI1NOTBM-NEXT:    movl %eax, (%esp)
-; X86-BMI1NOTBM-NEXT:    calll use32 at PLT
-; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:    addl $8, %esp
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr32_a5_skipextrauses:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    subl $8, %esp
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT:    bzhil %eax, %edx, %esi
-; X86-BMI1BMI2-NEXT:    movl %ecx, (%esp)
-; X86-BMI1BMI2-NEXT:    calll use32 at PLT
-; X86-BMI1BMI2-NEXT:    movl %esi, %eax
-; X86-BMI1BMI2-NEXT:    addl $8, %esp
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr32_a5_skipextrauses:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    subl $8, %esp
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    shll $8, %ecx
+; X86-BMI1-NEXT:    movzbl %al, %edx
+; X86-BMI1-NEXT:    orl %ecx, %edx
+; X86-BMI1-NEXT:    bextrl %edx, {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    movl %eax, (%esp)
+; X86-BMI1-NEXT:    calll use32 at PLT
+; X86-BMI1-NEXT:    movl %esi, %eax
+; X86-BMI1-NEXT:    addl $8, %esp
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr32_a5_skipextrauses:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    subl $8, %esp
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    shrxl %ecx, {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    bzhil %eax, %edx, %esi
+; X86-BMI2-NEXT:    movl %ecx, (%esp)
+; X86-BMI2-NEXT:    calll use32 at PLT
+; X86-BMI2-NEXT:    movl %esi, %eax
+; X86-BMI2-NEXT:    addl $8, %esp
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr32_a5_skipextrauses:
 ; X64-NOBMI:       # %bb.0:
@@ -508,29 +508,29 @@ define i32 @bextr32_a5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits
 ; X64-NOBMI-NEXT:    popq %rbx
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr32_a5_skipextrauses:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    pushq %rbx
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrl %eax, %edi, %ebx
-; X64-BMI1NOTBM-NEXT:    movl %esi, %edi
-; X64-BMI1NOTBM-NEXT:    callq use32 at PLT
-; X64-BMI1NOTBM-NEXT:    movl %ebx, %eax
-; X64-BMI1NOTBM-NEXT:    popq %rbx
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr32_a5_skipextrauses:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    pushq %rbx
-; X64-BMI1BMI2-NEXT:    shrxl %esi, %edi, %eax
-; X64-BMI1BMI2-NEXT:    bzhil %edx, %eax, %ebx
-; X64-BMI1BMI2-NEXT:    movl %esi, %edi
-; X64-BMI1BMI2-NEXT:    callq use32 at PLT
-; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI1BMI2-NEXT:    popq %rbx
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr32_a5_skipextrauses:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    pushq %rbx
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrl %eax, %edi, %ebx
+; X64-BMI1-NEXT:    movl %esi, %edi
+; X64-BMI1-NEXT:    callq use32 at PLT
+; X64-BMI1-NEXT:    movl %ebx, %eax
+; X64-BMI1-NEXT:    popq %rbx
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr32_a5_skipextrauses:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    pushq %rbx
+; X64-BMI2-NEXT:    shrxl %esi, %edi, %eax
+; X64-BMI2-NEXT:    bzhil %edx, %eax, %ebx
+; X64-BMI2-NEXT:    movl %esi, %edi
+; X64-BMI2-NEXT:    callq use32 at PLT
+; X64-BMI2-NEXT:    movl %ebx, %eax
+; X64-BMI2-NEXT:    popq %rbx
+; X64-BMI2-NEXT:    retq
   %shifted = lshr i32 %val, %numskipbits
   %onebit = shl i32 1, %numlowbits
   %mask = add nsw i32 %onebit, -1
@@ -578,78 +578,78 @@ define i64 @bextr64_a0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr64_a0:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %ch
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %esi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB7_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
-; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB7_2:
-; X86-BMI1NOTBM-NEXT:    movl $1, %eax
-; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
-; X86-BMI1NOTBM-NEXT:    movb %ch, %cl
-; X86-BMI1NOTBM-NEXT:    shldl %cl, %eax, %edx
-; X86-BMI1NOTBM-NEXT:    shll %cl, %eax
-; X86-BMI1NOTBM-NEXT:    testb $32, %ch
-; X86-BMI1NOTBM-NEXT:    je .LBB7_4
-; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB7_4:
-; X86-BMI1NOTBM-NEXT:    addl $-1, %eax
-; X86-BMI1NOTBM-NEXT:    adcl $-1, %edx
-; X86-BMI1NOTBM-NEXT:    andl %esi, %eax
-; X86-BMI1NOTBM-NEXT:    andl %edi, %edx
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr64_a0:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    pushl %edi
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %esi
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edi
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB7_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %edi, %esi
-; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
-; X86-BMI1BMI2-NEXT:  .LBB7_2:
-; X86-BMI1BMI2-NEXT:    movl $1, %eax
-; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI1BMI2-NEXT:    movl %ebx, %ecx
-; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %edx
-; X86-BMI1BMI2-NEXT:    shlxl %ebx, %eax, %eax
-; X86-BMI1BMI2-NEXT:    testb $32, %bl
-; X86-BMI1BMI2-NEXT:    je .LBB7_4
-; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:    xorl %eax, %eax
-; X86-BMI1BMI2-NEXT:  .LBB7_4:
-; X86-BMI1BMI2-NEXT:    addl $-1, %eax
-; X86-BMI1BMI2-NEXT:    adcl $-1, %edx
-; X86-BMI1BMI2-NEXT:    andl %esi, %eax
-; X86-BMI1BMI2-NEXT:    andl %edi, %edx
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %edi
-; X86-BMI1BMI2-NEXT:    popl %ebx
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr64_a0:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movl %eax, %edi
+; X86-BMI1-NEXT:    shrl %cl, %edi
+; X86-BMI1-NEXT:    shrdl %cl, %eax, %esi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB7_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %edi, %esi
+; X86-BMI1-NEXT:    xorl %edi, %edi
+; X86-BMI1-NEXT:  .LBB7_2:
+; X86-BMI1-NEXT:    movl $1, %eax
+; X86-BMI1-NEXT:    xorl %edx, %edx
+; X86-BMI1-NEXT:    movb %ch, %cl
+; X86-BMI1-NEXT:    shldl %cl, %eax, %edx
+; X86-BMI1-NEXT:    shll %cl, %eax
+; X86-BMI1-NEXT:    testb $32, %ch
+; X86-BMI1-NEXT:    je .LBB7_4
+; X86-BMI1-NEXT:  # %bb.3:
+; X86-BMI1-NEXT:    movl %eax, %edx
+; X86-BMI1-NEXT:    xorl %eax, %eax
+; X86-BMI1-NEXT:  .LBB7_4:
+; X86-BMI1-NEXT:    addl $-1, %eax
+; X86-BMI1-NEXT:    adcl $-1, %edx
+; X86-BMI1-NEXT:    andl %esi, %eax
+; X86-BMI1-NEXT:    andl %edi, %edx
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr64_a0:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    pushl %edi
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    shrdl %cl, %eax, %esi
+; X86-BMI2-NEXT:    shrxl %ecx, %eax, %edi
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB7_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %edi, %esi
+; X86-BMI2-NEXT:    xorl %edi, %edi
+; X86-BMI2-NEXT:  .LBB7_2:
+; X86-BMI2-NEXT:    movl $1, %eax
+; X86-BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI2-NEXT:    movl %ebx, %ecx
+; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
+; X86-BMI2-NEXT:    shlxl %ebx, %eax, %eax
+; X86-BMI2-NEXT:    testb $32, %bl
+; X86-BMI2-NEXT:    je .LBB7_4
+; X86-BMI2-NEXT:  # %bb.3:
+; X86-BMI2-NEXT:    movl %eax, %edx
+; X86-BMI2-NEXT:    xorl %eax, %eax
+; X86-BMI2-NEXT:  .LBB7_4:
+; X86-BMI2-NEXT:    addl $-1, %eax
+; X86-BMI2-NEXT:    adcl $-1, %edx
+; X86-BMI2-NEXT:    andl %esi, %eax
+; X86-BMI2-NEXT:    andl %edi, %edx
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %edi
+; X86-BMI2-NEXT:    popl %ebx
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_a0:
 ; X64-NOBMI:       # %bb.0:
@@ -663,19 +663,19 @@ define i64 @bextr64_a0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    andq %rdi, %rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr64_a0:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrq %rax, %rdi, %rax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr64_a0:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %rax
-; X64-BMI1BMI2-NEXT:    bzhiq %rdx, %rax, %rax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr64_a0:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrq %rax, %rdi, %rax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr64_a0:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %rax
+; X64-BMI2-NEXT:    bzhiq %rdx, %rax, %rax
+; X64-BMI2-NEXT:    retq
   %shifted = lshr i64 %val, %numskipbits
   %onebit = shl i64 1, %numlowbits
   %mask = add nsw i64 %onebit, -1
@@ -721,80 +721,80 @@ define i64 @bextr64_a0_arithmetic(i64 %val, i64 %numskipbits, i64 %numlowbits) n
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr64_a0_arithmetic:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %ch
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movl %eax, %esi
-; X86-BMI1NOTBM-NEXT:    sarl %cl, %esi
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %edi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB8_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    sarl $31, %eax
-; X86-BMI1NOTBM-NEXT:    movl %esi, %edi
-; X86-BMI1NOTBM-NEXT:    movl %eax, %esi
-; X86-BMI1NOTBM-NEXT:  .LBB8_2:
-; X86-BMI1NOTBM-NEXT:    movl $1, %eax
-; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
-; X86-BMI1NOTBM-NEXT:    movb %ch, %cl
-; X86-BMI1NOTBM-NEXT:    shldl %cl, %eax, %edx
-; X86-BMI1NOTBM-NEXT:    shll %cl, %eax
-; X86-BMI1NOTBM-NEXT:    testb $32, %ch
-; X86-BMI1NOTBM-NEXT:    je .LBB8_4
-; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB8_4:
-; X86-BMI1NOTBM-NEXT:    addl $-1, %eax
-; X86-BMI1NOTBM-NEXT:    adcl $-1, %edx
-; X86-BMI1NOTBM-NEXT:    andl %edi, %eax
-; X86-BMI1NOTBM-NEXT:    andl %esi, %edx
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr64_a0_arithmetic:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    pushl %edi
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %esi
-; X86-BMI1BMI2-NEXT:    sarxl %ecx, %eax, %edi
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB8_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    sarl $31, %eax
-; X86-BMI1BMI2-NEXT:    movl %edi, %esi
-; X86-BMI1BMI2-NEXT:    movl %eax, %edi
-; X86-BMI1BMI2-NEXT:  .LBB8_2:
-; X86-BMI1BMI2-NEXT:    movl $1, %eax
-; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI1BMI2-NEXT:    movl %ebx, %ecx
-; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %edx
-; X86-BMI1BMI2-NEXT:    shlxl %ebx, %eax, %eax
-; X86-BMI1BMI2-NEXT:    testb $32, %bl
-; X86-BMI1BMI2-NEXT:    je .LBB8_4
-; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:    xorl %eax, %eax
-; X86-BMI1BMI2-NEXT:  .LBB8_4:
-; X86-BMI1BMI2-NEXT:    addl $-1, %eax
-; X86-BMI1BMI2-NEXT:    adcl $-1, %edx
-; X86-BMI1BMI2-NEXT:    andl %esi, %eax
-; X86-BMI1BMI2-NEXT:    andl %edi, %edx
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %edi
-; X86-BMI1BMI2-NEXT:    popl %ebx
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr64_a0_arithmetic:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movl %eax, %esi
+; X86-BMI1-NEXT:    sarl %cl, %esi
+; X86-BMI1-NEXT:    shrdl %cl, %eax, %edi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB8_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    sarl $31, %eax
+; X86-BMI1-NEXT:    movl %esi, %edi
+; X86-BMI1-NEXT:    movl %eax, %esi
+; X86-BMI1-NEXT:  .LBB8_2:
+; X86-BMI1-NEXT:    movl $1, %eax
+; X86-BMI1-NEXT:    xorl %edx, %edx
+; X86-BMI1-NEXT:    movb %ch, %cl
+; X86-BMI1-NEXT:    shldl %cl, %eax, %edx
+; X86-BMI1-NEXT:    shll %cl, %eax
+; X86-BMI1-NEXT:    testb $32, %ch
+; X86-BMI1-NEXT:    je .LBB8_4
+; X86-BMI1-NEXT:  # %bb.3:
+; X86-BMI1-NEXT:    movl %eax, %edx
+; X86-BMI1-NEXT:    xorl %eax, %eax
+; X86-BMI1-NEXT:  .LBB8_4:
+; X86-BMI1-NEXT:    addl $-1, %eax
+; X86-BMI1-NEXT:    adcl $-1, %edx
+; X86-BMI1-NEXT:    andl %edi, %eax
+; X86-BMI1-NEXT:    andl %esi, %edx
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr64_a0_arithmetic:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    pushl %edi
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    shrdl %cl, %eax, %esi
+; X86-BMI2-NEXT:    sarxl %ecx, %eax, %edi
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB8_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    sarl $31, %eax
+; X86-BMI2-NEXT:    movl %edi, %esi
+; X86-BMI2-NEXT:    movl %eax, %edi
+; X86-BMI2-NEXT:  .LBB8_2:
+; X86-BMI2-NEXT:    movl $1, %eax
+; X86-BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI2-NEXT:    movl %ebx, %ecx
+; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
+; X86-BMI2-NEXT:    shlxl %ebx, %eax, %eax
+; X86-BMI2-NEXT:    testb $32, %bl
+; X86-BMI2-NEXT:    je .LBB8_4
+; X86-BMI2-NEXT:  # %bb.3:
+; X86-BMI2-NEXT:    movl %eax, %edx
+; X86-BMI2-NEXT:    xorl %eax, %eax
+; X86-BMI2-NEXT:  .LBB8_4:
+; X86-BMI2-NEXT:    addl $-1, %eax
+; X86-BMI2-NEXT:    adcl $-1, %edx
+; X86-BMI2-NEXT:    andl %esi, %eax
+; X86-BMI2-NEXT:    andl %edi, %edx
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %edi
+; X86-BMI2-NEXT:    popl %ebx
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_a0_arithmetic:
 ; X64-NOBMI:       # %bb.0:
@@ -808,20 +808,20 @@ define i64 @bextr64_a0_arithmetic(i64 %val, i64 %numskipbits, i64 %numlowbits) n
 ; X64-NOBMI-NEXT:    andq %rdi, %rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr64_a0_arithmetic:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    movq %rsi, %rcx
-; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $rcx
-; X64-BMI1NOTBM-NEXT:    sarq %cl, %rdi
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    bextrq %rdx, %rdi, %rax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr64_a0_arithmetic:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    sarxq %rsi, %rdi, %rax
-; X64-BMI1BMI2-NEXT:    bzhiq %rdx, %rax, %rax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr64_a0_arithmetic:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    movq %rsi, %rcx
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI1-NEXT:    sarq %cl, %rdi
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    bextrq %rdx, %rdi, %rax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr64_a0_arithmetic:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    sarxq %rsi, %rdi, %rax
+; X64-BMI2-NEXT:    bzhiq %rdx, %rax, %rax
+; X64-BMI2-NEXT:    retq
   %shifted = ashr i64 %val, %numskipbits
   %onebit = shl i64 1, %numlowbits
   %mask = add nsw i64 %onebit, -1
@@ -866,78 +866,78 @@ define i64 @bextr64_a1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext %
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr64_a1_indexzext:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %ch
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %esi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB9_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
-; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB9_2:
-; X86-BMI1NOTBM-NEXT:    movl $1, %eax
-; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
-; X86-BMI1NOTBM-NEXT:    movb %ch, %cl
-; X86-BMI1NOTBM-NEXT:    shldl %cl, %eax, %edx
-; X86-BMI1NOTBM-NEXT:    shll %cl, %eax
-; X86-BMI1NOTBM-NEXT:    testb $32, %ch
-; X86-BMI1NOTBM-NEXT:    je .LBB9_4
-; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB9_4:
-; X86-BMI1NOTBM-NEXT:    addl $-1, %eax
-; X86-BMI1NOTBM-NEXT:    adcl $-1, %edx
-; X86-BMI1NOTBM-NEXT:    andl %esi, %eax
-; X86-BMI1NOTBM-NEXT:    andl %edi, %edx
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr64_a1_indexzext:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    pushl %edi
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %esi
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edi
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB9_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %edi, %esi
-; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
-; X86-BMI1BMI2-NEXT:  .LBB9_2:
-; X86-BMI1BMI2-NEXT:    movl $1, %eax
-; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI1BMI2-NEXT:    movl %ebx, %ecx
-; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %edx
-; X86-BMI1BMI2-NEXT:    shlxl %ebx, %eax, %eax
-; X86-BMI1BMI2-NEXT:    testb $32, %bl
-; X86-BMI1BMI2-NEXT:    je .LBB9_4
-; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:    xorl %eax, %eax
-; X86-BMI1BMI2-NEXT:  .LBB9_4:
-; X86-BMI1BMI2-NEXT:    addl $-1, %eax
-; X86-BMI1BMI2-NEXT:    adcl $-1, %edx
-; X86-BMI1BMI2-NEXT:    andl %esi, %eax
-; X86-BMI1BMI2-NEXT:    andl %edi, %edx
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %edi
-; X86-BMI1BMI2-NEXT:    popl %ebx
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr64_a1_indexzext:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movl %eax, %edi
+; X86-BMI1-NEXT:    shrl %cl, %edi
+; X86-BMI1-NEXT:    shrdl %cl, %eax, %esi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB9_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %edi, %esi
+; X86-BMI1-NEXT:    xorl %edi, %edi
+; X86-BMI1-NEXT:  .LBB9_2:
+; X86-BMI1-NEXT:    movl $1, %eax
+; X86-BMI1-NEXT:    xorl %edx, %edx
+; X86-BMI1-NEXT:    movb %ch, %cl
+; X86-BMI1-NEXT:    shldl %cl, %eax, %edx
+; X86-BMI1-NEXT:    shll %cl, %eax
+; X86-BMI1-NEXT:    testb $32, %ch
+; X86-BMI1-NEXT:    je .LBB9_4
+; X86-BMI1-NEXT:  # %bb.3:
+; X86-BMI1-NEXT:    movl %eax, %edx
+; X86-BMI1-NEXT:    xorl %eax, %eax
+; X86-BMI1-NEXT:  .LBB9_4:
+; X86-BMI1-NEXT:    addl $-1, %eax
+; X86-BMI1-NEXT:    adcl $-1, %edx
+; X86-BMI1-NEXT:    andl %esi, %eax
+; X86-BMI1-NEXT:    andl %edi, %edx
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr64_a1_indexzext:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    pushl %edi
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    shrdl %cl, %eax, %esi
+; X86-BMI2-NEXT:    shrxl %ecx, %eax, %edi
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB9_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %edi, %esi
+; X86-BMI2-NEXT:    xorl %edi, %edi
+; X86-BMI2-NEXT:  .LBB9_2:
+; X86-BMI2-NEXT:    movl $1, %eax
+; X86-BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI2-NEXT:    movl %ebx, %ecx
+; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
+; X86-BMI2-NEXT:    shlxl %ebx, %eax, %eax
+; X86-BMI2-NEXT:    testb $32, %bl
+; X86-BMI2-NEXT:    je .LBB9_4
+; X86-BMI2-NEXT:  # %bb.3:
+; X86-BMI2-NEXT:    movl %eax, %edx
+; X86-BMI2-NEXT:    xorl %eax, %eax
+; X86-BMI2-NEXT:  .LBB9_4:
+; X86-BMI2-NEXT:    addl $-1, %eax
+; X86-BMI2-NEXT:    adcl $-1, %edx
+; X86-BMI2-NEXT:    andl %esi, %eax
+; X86-BMI2-NEXT:    andl %edi, %edx
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %edi
+; X86-BMI2-NEXT:    popl %ebx
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_a1_indexzext:
 ; X64-NOBMI:       # %bb.0:
@@ -951,21 +951,21 @@ define i64 @bextr64_a1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext %
 ; X64-NOBMI-NEXT:    andq %rdi, %rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr64_a1_indexzext:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrq %rax, %rdi, %rax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr64_a1_indexzext:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    # kill: def $edx killed $edx def $rdx
-; X64-BMI1BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %rax
-; X64-BMI1BMI2-NEXT:    bzhiq %rdx, %rax, %rax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr64_a1_indexzext:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrq %rax, %rdi, %rax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr64_a1_indexzext:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    # kill: def $edx killed $edx def $rdx
+; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %rax
+; X64-BMI2-NEXT:    bzhiq %rdx, %rax, %rax
+; X64-BMI2-NEXT:    retq
   %skip = zext i8 %numskipbits to i64
   %shifted = lshr i64 %val, %skip
   %conv = zext i8 %numlowbits to i64
@@ -1013,80 +1013,80 @@ define i64 @bextr64_a2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr64_a2_load:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %ch
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movl (%eax), %esi
-; X86-BMI1NOTBM-NEXT:    movl 4(%eax), %eax
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %esi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB10_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
-; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB10_2:
-; X86-BMI1NOTBM-NEXT:    movl $1, %eax
-; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
-; X86-BMI1NOTBM-NEXT:    movb %ch, %cl
-; X86-BMI1NOTBM-NEXT:    shldl %cl, %eax, %edx
-; X86-BMI1NOTBM-NEXT:    shll %cl, %eax
-; X86-BMI1NOTBM-NEXT:    testb $32, %ch
-; X86-BMI1NOTBM-NEXT:    je .LBB10_4
-; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB10_4:
-; X86-BMI1NOTBM-NEXT:    addl $-1, %eax
-; X86-BMI1NOTBM-NEXT:    adcl $-1, %edx
-; X86-BMI1NOTBM-NEXT:    andl %esi, %eax
-; X86-BMI1NOTBM-NEXT:    andl %edi, %edx
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr64_a2_load:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    pushl %edi
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    movl (%eax), %esi
-; X86-BMI1BMI2-NEXT:    movl 4(%eax), %eax
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edi
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %esi
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB10_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %edi, %esi
-; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
-; X86-BMI1BMI2-NEXT:  .LBB10_2:
-; X86-BMI1BMI2-NEXT:    movl $1, %eax
-; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI1BMI2-NEXT:    movl %ebx, %ecx
-; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %edx
-; X86-BMI1BMI2-NEXT:    shlxl %ebx, %eax, %eax
-; X86-BMI1BMI2-NEXT:    testb $32, %bl
-; X86-BMI1BMI2-NEXT:    je .LBB10_4
-; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:    xorl %eax, %eax
-; X86-BMI1BMI2-NEXT:  .LBB10_4:
-; X86-BMI1BMI2-NEXT:    addl $-1, %eax
-; X86-BMI1BMI2-NEXT:    adcl $-1, %edx
-; X86-BMI1BMI2-NEXT:    andl %esi, %eax
-; X86-BMI1BMI2-NEXT:    andl %edi, %edx
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %edi
-; X86-BMI1BMI2-NEXT:    popl %ebx
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr64_a2_load:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movl (%eax), %esi
+; X86-BMI1-NEXT:    movl 4(%eax), %eax
+; X86-BMI1-NEXT:    movl %eax, %edi
+; X86-BMI1-NEXT:    shrl %cl, %edi
+; X86-BMI1-NEXT:    shrdl %cl, %eax, %esi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB10_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %edi, %esi
+; X86-BMI1-NEXT:    xorl %edi, %edi
+; X86-BMI1-NEXT:  .LBB10_2:
+; X86-BMI1-NEXT:    movl $1, %eax
+; X86-BMI1-NEXT:    xorl %edx, %edx
+; X86-BMI1-NEXT:    movb %ch, %cl
+; X86-BMI1-NEXT:    shldl %cl, %eax, %edx
+; X86-BMI1-NEXT:    shll %cl, %eax
+; X86-BMI1-NEXT:    testb $32, %ch
+; X86-BMI1-NEXT:    je .LBB10_4
+; X86-BMI1-NEXT:  # %bb.3:
+; X86-BMI1-NEXT:    movl %eax, %edx
+; X86-BMI1-NEXT:    xorl %eax, %eax
+; X86-BMI1-NEXT:  .LBB10_4:
+; X86-BMI1-NEXT:    addl $-1, %eax
+; X86-BMI1-NEXT:    adcl $-1, %edx
+; X86-BMI1-NEXT:    andl %esi, %eax
+; X86-BMI1-NEXT:    andl %edi, %edx
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr64_a2_load:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    pushl %edi
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movl (%eax), %esi
+; X86-BMI2-NEXT:    movl 4(%eax), %eax
+; X86-BMI2-NEXT:    shrxl %ecx, %eax, %edi
+; X86-BMI2-NEXT:    shrdl %cl, %eax, %esi
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB10_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %edi, %esi
+; X86-BMI2-NEXT:    xorl %edi, %edi
+; X86-BMI2-NEXT:  .LBB10_2:
+; X86-BMI2-NEXT:    movl $1, %eax
+; X86-BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI2-NEXT:    movl %ebx, %ecx
+; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
+; X86-BMI2-NEXT:    shlxl %ebx, %eax, %eax
+; X86-BMI2-NEXT:    testb $32, %bl
+; X86-BMI2-NEXT:    je .LBB10_4
+; X86-BMI2-NEXT:  # %bb.3:
+; X86-BMI2-NEXT:    movl %eax, %edx
+; X86-BMI2-NEXT:    xorl %eax, %eax
+; X86-BMI2-NEXT:  .LBB10_4:
+; X86-BMI2-NEXT:    addl $-1, %eax
+; X86-BMI2-NEXT:    adcl $-1, %edx
+; X86-BMI2-NEXT:    andl %esi, %eax
+; X86-BMI2-NEXT:    andl %edi, %edx
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %edi
+; X86-BMI2-NEXT:    popl %ebx
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_a2_load:
 ; X64-NOBMI:       # %bb.0:
@@ -1101,19 +1101,19 @@ define i64 @bextr64_a2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X64-NOBMI-NEXT:    andq %rsi, %rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr64_a2_load:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrq %rax, (%rdi), %rax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr64_a2_load:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, (%rdi), %rax
-; X64-BMI1BMI2-NEXT:    bzhiq %rdx, %rax, %rax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr64_a2_load:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrq %rax, (%rdi), %rax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr64_a2_load:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxq %rsi, (%rdi), %rax
+; X64-BMI2-NEXT:    bzhiq %rdx, %rax, %rax
+; X64-BMI2-NEXT:    retq
   %val = load i64, i64* %w
   %shifted = lshr i64 %val, %numskipbits
   %onebit = shl i64 1, %numlowbits
@@ -1160,80 +1160,80 @@ define i64 @bextr64_a3_load_indexzext(i64* %w, i8 zeroext %numskipbits, i8 zeroe
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr64_a3_load_indexzext:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %ch
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movl (%eax), %esi
-; X86-BMI1NOTBM-NEXT:    movl 4(%eax), %eax
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %esi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB11_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
-; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB11_2:
-; X86-BMI1NOTBM-NEXT:    movl $1, %eax
-; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
-; X86-BMI1NOTBM-NEXT:    movb %ch, %cl
-; X86-BMI1NOTBM-NEXT:    shldl %cl, %eax, %edx
-; X86-BMI1NOTBM-NEXT:    shll %cl, %eax
-; X86-BMI1NOTBM-NEXT:    testb $32, %ch
-; X86-BMI1NOTBM-NEXT:    je .LBB11_4
-; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB11_4:
-; X86-BMI1NOTBM-NEXT:    addl $-1, %eax
-; X86-BMI1NOTBM-NEXT:    adcl $-1, %edx
-; X86-BMI1NOTBM-NEXT:    andl %esi, %eax
-; X86-BMI1NOTBM-NEXT:    andl %edi, %edx
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr64_a3_load_indexzext:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    pushl %edi
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    movl (%eax), %esi
-; X86-BMI1BMI2-NEXT:    movl 4(%eax), %eax
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edi
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %esi
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB11_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %edi, %esi
-; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
-; X86-BMI1BMI2-NEXT:  .LBB11_2:
-; X86-BMI1BMI2-NEXT:    movl $1, %eax
-; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI1BMI2-NEXT:    movl %ebx, %ecx
-; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %edx
-; X86-BMI1BMI2-NEXT:    shlxl %ebx, %eax, %eax
-; X86-BMI1BMI2-NEXT:    testb $32, %bl
-; X86-BMI1BMI2-NEXT:    je .LBB11_4
-; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:    xorl %eax, %eax
-; X86-BMI1BMI2-NEXT:  .LBB11_4:
-; X86-BMI1BMI2-NEXT:    addl $-1, %eax
-; X86-BMI1BMI2-NEXT:    adcl $-1, %edx
-; X86-BMI1BMI2-NEXT:    andl %esi, %eax
-; X86-BMI1BMI2-NEXT:    andl %edi, %edx
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %edi
-; X86-BMI1BMI2-NEXT:    popl %ebx
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr64_a3_load_indexzext:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movl (%eax), %esi
+; X86-BMI1-NEXT:    movl 4(%eax), %eax
+; X86-BMI1-NEXT:    movl %eax, %edi
+; X86-BMI1-NEXT:    shrl %cl, %edi
+; X86-BMI1-NEXT:    shrdl %cl, %eax, %esi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB11_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %edi, %esi
+; X86-BMI1-NEXT:    xorl %edi, %edi
+; X86-BMI1-NEXT:  .LBB11_2:
+; X86-BMI1-NEXT:    movl $1, %eax
+; X86-BMI1-NEXT:    xorl %edx, %edx
+; X86-BMI1-NEXT:    movb %ch, %cl
+; X86-BMI1-NEXT:    shldl %cl, %eax, %edx
+; X86-BMI1-NEXT:    shll %cl, %eax
+; X86-BMI1-NEXT:    testb $32, %ch
+; X86-BMI1-NEXT:    je .LBB11_4
+; X86-BMI1-NEXT:  # %bb.3:
+; X86-BMI1-NEXT:    movl %eax, %edx
+; X86-BMI1-NEXT:    xorl %eax, %eax
+; X86-BMI1-NEXT:  .LBB11_4:
+; X86-BMI1-NEXT:    addl $-1, %eax
+; X86-BMI1-NEXT:    adcl $-1, %edx
+; X86-BMI1-NEXT:    andl %esi, %eax
+; X86-BMI1-NEXT:    andl %edi, %edx
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr64_a3_load_indexzext:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    pushl %edi
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movl (%eax), %esi
+; X86-BMI2-NEXT:    movl 4(%eax), %eax
+; X86-BMI2-NEXT:    shrxl %ecx, %eax, %edi
+; X86-BMI2-NEXT:    shrdl %cl, %eax, %esi
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB11_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %edi, %esi
+; X86-BMI2-NEXT:    xorl %edi, %edi
+; X86-BMI2-NEXT:  .LBB11_2:
+; X86-BMI2-NEXT:    movl $1, %eax
+; X86-BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI2-NEXT:    movl %ebx, %ecx
+; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
+; X86-BMI2-NEXT:    shlxl %ebx, %eax, %eax
+; X86-BMI2-NEXT:    testb $32, %bl
+; X86-BMI2-NEXT:    je .LBB11_4
+; X86-BMI2-NEXT:  # %bb.3:
+; X86-BMI2-NEXT:    movl %eax, %edx
+; X86-BMI2-NEXT:    xorl %eax, %eax
+; X86-BMI2-NEXT:  .LBB11_4:
+; X86-BMI2-NEXT:    addl $-1, %eax
+; X86-BMI2-NEXT:    adcl $-1, %edx
+; X86-BMI2-NEXT:    andl %esi, %eax
+; X86-BMI2-NEXT:    andl %edi, %edx
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %edi
+; X86-BMI2-NEXT:    popl %ebx
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_a3_load_indexzext:
 ; X64-NOBMI:       # %bb.0:
@@ -1248,21 +1248,21 @@ define i64 @bextr64_a3_load_indexzext(i64* %w, i8 zeroext %numskipbits, i8 zeroe
 ; X64-NOBMI-NEXT:    andq %rsi, %rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr64_a3_load_indexzext:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrq %rax, (%rdi), %rax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr64_a3_load_indexzext:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    # kill: def $edx killed $edx def $rdx
-; X64-BMI1BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, (%rdi), %rax
-; X64-BMI1BMI2-NEXT:    bzhiq %rdx, %rax, %rax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr64_a3_load_indexzext:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrq %rax, (%rdi), %rax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr64_a3_load_indexzext:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    # kill: def $edx killed $edx def $rdx
+; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-BMI2-NEXT:    shrxq %rsi, (%rdi), %rax
+; X64-BMI2-NEXT:    bzhiq %rdx, %rax, %rax
+; X64-BMI2-NEXT:    retq
   %val = load i64, i64* %w
   %skip = zext i8 %numskipbits to i64
   %shifted = lshr i64 %val, %skip
@@ -1310,78 +1310,78 @@ define i64 @bextr64_a4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits)
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr64_a4_commutative:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %ch
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1NOTBM-NEXT:    movl %esi, %edx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %esi, %eax
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB12_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %edx, %eax
-; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB12_2:
-; X86-BMI1NOTBM-NEXT:    movl $1, %esi
-; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
-; X86-BMI1NOTBM-NEXT:    movb %ch, %cl
-; X86-BMI1NOTBM-NEXT:    shldl %cl, %esi, %edi
-; X86-BMI1NOTBM-NEXT:    shll %cl, %esi
-; X86-BMI1NOTBM-NEXT:    testb $32, %ch
-; X86-BMI1NOTBM-NEXT:    je .LBB12_4
-; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl %esi, %edi
-; X86-BMI1NOTBM-NEXT:    xorl %esi, %esi
-; X86-BMI1NOTBM-NEXT:  .LBB12_4:
-; X86-BMI1NOTBM-NEXT:    addl $-1, %esi
-; X86-BMI1NOTBM-NEXT:    adcl $-1, %edi
-; X86-BMI1NOTBM-NEXT:    andl %esi, %eax
-; X86-BMI1NOTBM-NEXT:    andl %edi, %edx
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr64_a4_commutative:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    pushl %edi
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %edx, %eax
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edx, %edx
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB12_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %edx, %eax
-; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI1BMI2-NEXT:  .LBB12_2:
-; X86-BMI1BMI2-NEXT:    movl $1, %edi
-; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
-; X86-BMI1BMI2-NEXT:    movl %ebx, %ecx
-; X86-BMI1BMI2-NEXT:    shldl %cl, %edi, %esi
-; X86-BMI1BMI2-NEXT:    shlxl %ebx, %edi, %ecx
-; X86-BMI1BMI2-NEXT:    testb $32, %bl
-; X86-BMI1BMI2-NEXT:    je .LBB12_4
-; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    movl %ecx, %esi
-; X86-BMI1BMI2-NEXT:    xorl %ecx, %ecx
-; X86-BMI1BMI2-NEXT:  .LBB12_4:
-; X86-BMI1BMI2-NEXT:    addl $-1, %ecx
-; X86-BMI1BMI2-NEXT:    adcl $-1, %esi
-; X86-BMI1BMI2-NEXT:    andl %ecx, %eax
-; X86-BMI1BMI2-NEXT:    andl %esi, %edx
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %edi
-; X86-BMI1BMI2-NEXT:    popl %ebx
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr64_a4_commutative:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    movl %esi, %edx
+; X86-BMI1-NEXT:    shrl %cl, %edx
+; X86-BMI1-NEXT:    shrdl %cl, %esi, %eax
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB12_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %edx, %eax
+; X86-BMI1-NEXT:    xorl %edx, %edx
+; X86-BMI1-NEXT:  .LBB12_2:
+; X86-BMI1-NEXT:    movl $1, %esi
+; X86-BMI1-NEXT:    xorl %edi, %edi
+; X86-BMI1-NEXT:    movb %ch, %cl
+; X86-BMI1-NEXT:    shldl %cl, %esi, %edi
+; X86-BMI1-NEXT:    shll %cl, %esi
+; X86-BMI1-NEXT:    testb $32, %ch
+; X86-BMI1-NEXT:    je .LBB12_4
+; X86-BMI1-NEXT:  # %bb.3:
+; X86-BMI1-NEXT:    movl %esi, %edi
+; X86-BMI1-NEXT:    xorl %esi, %esi
+; X86-BMI1-NEXT:  .LBB12_4:
+; X86-BMI1-NEXT:    addl $-1, %esi
+; X86-BMI1-NEXT:    adcl $-1, %edi
+; X86-BMI1-NEXT:    andl %esi, %eax
+; X86-BMI1-NEXT:    andl %edi, %edx
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr64_a4_commutative:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    pushl %edi
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    shrdl %cl, %edx, %eax
+; X86-BMI2-NEXT:    shrxl %ecx, %edx, %edx
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB12_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %edx, %eax
+; X86-BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI2-NEXT:  .LBB12_2:
+; X86-BMI2-NEXT:    movl $1, %edi
+; X86-BMI2-NEXT:    xorl %esi, %esi
+; X86-BMI2-NEXT:    movl %ebx, %ecx
+; X86-BMI2-NEXT:    shldl %cl, %edi, %esi
+; X86-BMI2-NEXT:    shlxl %ebx, %edi, %ecx
+; X86-BMI2-NEXT:    testb $32, %bl
+; X86-BMI2-NEXT:    je .LBB12_4
+; X86-BMI2-NEXT:  # %bb.3:
+; X86-BMI2-NEXT:    movl %ecx, %esi
+; X86-BMI2-NEXT:    xorl %ecx, %ecx
+; X86-BMI2-NEXT:  .LBB12_4:
+; X86-BMI2-NEXT:    addl $-1, %ecx
+; X86-BMI2-NEXT:    adcl $-1, %esi
+; X86-BMI2-NEXT:    andl %ecx, %eax
+; X86-BMI2-NEXT:    andl %esi, %edx
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %edi
+; X86-BMI2-NEXT:    popl %ebx
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_a4_commutative:
 ; X64-NOBMI:       # %bb.0:
@@ -1395,19 +1395,19 @@ define i64 @bextr64_a4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits)
 ; X64-NOBMI-NEXT:    andq %rdi, %rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr64_a4_commutative:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrq %rax, %rdi, %rax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr64_a4_commutative:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %rax
-; X64-BMI1BMI2-NEXT:    bzhiq %rdx, %rax, %rax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr64_a4_commutative:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrq %rax, %rdi, %rax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr64_a4_commutative:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %rax
+; X64-BMI2-NEXT:    bzhiq %rdx, %rax, %rax
+; X64-BMI2-NEXT:    retq
   %shifted = lshr i64 %val, %numskipbits
   %onebit = shl i64 1, %numlowbits
   %mask = add nsw i64 %onebit, -1
@@ -1466,104 +1466,104 @@ define i64 @bextr64_a5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
 ; X86-NOBMI-NEXT:    popl %ebp
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr64_a5_skipextrauses:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %ebp
-; X86-BMI1NOTBM-NEXT:    pushl %ebx
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    subl $12, %esp
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %dl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movl %esi, %ebp
-; X86-BMI1NOTBM-NEXT:    movl %eax, %ecx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %ebp
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %esi, %ebx
-; X86-BMI1NOTBM-NEXT:    testb $32, %al
-; X86-BMI1NOTBM-NEXT:    je .LBB13_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %ebp, %ebx
-; X86-BMI1NOTBM-NEXT:    xorl %ebp, %ebp
-; X86-BMI1NOTBM-NEXT:  .LBB13_2:
-; X86-BMI1NOTBM-NEXT:    movl $1, %esi
-; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
-; X86-BMI1NOTBM-NEXT:    movl %edx, %ecx
-; X86-BMI1NOTBM-NEXT:    shldl %cl, %esi, %edi
-; X86-BMI1NOTBM-NEXT:    shll %cl, %esi
-; X86-BMI1NOTBM-NEXT:    testb $32, %dl
-; X86-BMI1NOTBM-NEXT:    je .LBB13_4
-; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl %esi, %edi
-; X86-BMI1NOTBM-NEXT:    xorl %esi, %esi
-; X86-BMI1NOTBM-NEXT:  .LBB13_4:
-; X86-BMI1NOTBM-NEXT:    addl $-1, %esi
-; X86-BMI1NOTBM-NEXT:    adcl $-1, %edi
-; X86-BMI1NOTBM-NEXT:    andl %ebx, %esi
-; X86-BMI1NOTBM-NEXT:    andl %ebp, %edi
-; X86-BMI1NOTBM-NEXT:    subl $8, %esp
-; X86-BMI1NOTBM-NEXT:    pushl {{[0-9]+}}(%esp)
-; X86-BMI1NOTBM-NEXT:    pushl %eax
-; X86-BMI1NOTBM-NEXT:    calll use64 at PLT
-; X86-BMI1NOTBM-NEXT:    addl $16, %esp
-; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:    movl %edi, %edx
-; X86-BMI1NOTBM-NEXT:    addl $12, %esp
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    popl %ebx
-; X86-BMI1NOTBM-NEXT:    popl %ebp
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr64_a5_skipextrauses:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebp
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    pushl %edi
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    subl $12, %esp
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %dl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    movl %eax, %ecx
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %ebx
-; X86-BMI1BMI2-NEXT:    shrxl %eax, %esi, %ebp
-; X86-BMI1BMI2-NEXT:    testb $32, %al
-; X86-BMI1BMI2-NEXT:    je .LBB13_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %ebp, %ebx
-; X86-BMI1BMI2-NEXT:    xorl %ebp, %ebp
-; X86-BMI1BMI2-NEXT:  .LBB13_2:
-; X86-BMI1BMI2-NEXT:    movl $1, %edi
-; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
-; X86-BMI1BMI2-NEXT:    movl %edx, %ecx
-; X86-BMI1BMI2-NEXT:    shldl %cl, %edi, %esi
-; X86-BMI1BMI2-NEXT:    shlxl %edx, %edi, %edi
-; X86-BMI1BMI2-NEXT:    testb $32, %dl
-; X86-BMI1BMI2-NEXT:    je .LBB13_4
-; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    movl %edi, %esi
-; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
-; X86-BMI1BMI2-NEXT:  .LBB13_4:
-; X86-BMI1BMI2-NEXT:    addl $-1, %edi
-; X86-BMI1BMI2-NEXT:    adcl $-1, %esi
-; X86-BMI1BMI2-NEXT:    andl %ebx, %edi
-; X86-BMI1BMI2-NEXT:    andl %ebp, %esi
-; X86-BMI1BMI2-NEXT:    subl $8, %esp
-; X86-BMI1BMI2-NEXT:    pushl {{[0-9]+}}(%esp)
-; X86-BMI1BMI2-NEXT:    pushl %eax
-; X86-BMI1BMI2-NEXT:    calll use64 at PLT
-; X86-BMI1BMI2-NEXT:    addl $16, %esp
-; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:    movl %esi, %edx
-; X86-BMI1BMI2-NEXT:    addl $12, %esp
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %edi
-; X86-BMI1BMI2-NEXT:    popl %ebx
-; X86-BMI1BMI2-NEXT:    popl %ebp
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr64_a5_skipextrauses:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %ebp
+; X86-BMI1-NEXT:    pushl %ebx
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    subl $12, %esp
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movl %esi, %ebp
+; X86-BMI1-NEXT:    movl %eax, %ecx
+; X86-BMI1-NEXT:    shrl %cl, %ebp
+; X86-BMI1-NEXT:    shrdl %cl, %esi, %ebx
+; X86-BMI1-NEXT:    testb $32, %al
+; X86-BMI1-NEXT:    je .LBB13_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %ebp, %ebx
+; X86-BMI1-NEXT:    xorl %ebp, %ebp
+; X86-BMI1-NEXT:  .LBB13_2:
+; X86-BMI1-NEXT:    movl $1, %esi
+; X86-BMI1-NEXT:    xorl %edi, %edi
+; X86-BMI1-NEXT:    movl %edx, %ecx
+; X86-BMI1-NEXT:    shldl %cl, %esi, %edi
+; X86-BMI1-NEXT:    shll %cl, %esi
+; X86-BMI1-NEXT:    testb $32, %dl
+; X86-BMI1-NEXT:    je .LBB13_4
+; X86-BMI1-NEXT:  # %bb.3:
+; X86-BMI1-NEXT:    movl %esi, %edi
+; X86-BMI1-NEXT:    xorl %esi, %esi
+; X86-BMI1-NEXT:  .LBB13_4:
+; X86-BMI1-NEXT:    addl $-1, %esi
+; X86-BMI1-NEXT:    adcl $-1, %edi
+; X86-BMI1-NEXT:    andl %ebx, %esi
+; X86-BMI1-NEXT:    andl %ebp, %edi
+; X86-BMI1-NEXT:    subl $8, %esp
+; X86-BMI1-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-BMI1-NEXT:    pushl %eax
+; X86-BMI1-NEXT:    calll use64 at PLT
+; X86-BMI1-NEXT:    addl $16, %esp
+; X86-BMI1-NEXT:    movl %esi, %eax
+; X86-BMI1-NEXT:    movl %edi, %edx
+; X86-BMI1-NEXT:    addl $12, %esp
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    popl %ebx
+; X86-BMI1-NEXT:    popl %ebp
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr64_a5_skipextrauses:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %ebp
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    pushl %edi
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    subl $12, %esp
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movl %eax, %ecx
+; X86-BMI2-NEXT:    shrdl %cl, %esi, %ebx
+; X86-BMI2-NEXT:    shrxl %eax, %esi, %ebp
+; X86-BMI2-NEXT:    testb $32, %al
+; X86-BMI2-NEXT:    je .LBB13_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %ebp, %ebx
+; X86-BMI2-NEXT:    xorl %ebp, %ebp
+; X86-BMI2-NEXT:  .LBB13_2:
+; X86-BMI2-NEXT:    movl $1, %edi
+; X86-BMI2-NEXT:    xorl %esi, %esi
+; X86-BMI2-NEXT:    movl %edx, %ecx
+; X86-BMI2-NEXT:    shldl %cl, %edi, %esi
+; X86-BMI2-NEXT:    shlxl %edx, %edi, %edi
+; X86-BMI2-NEXT:    testb $32, %dl
+; X86-BMI2-NEXT:    je .LBB13_4
+; X86-BMI2-NEXT:  # %bb.3:
+; X86-BMI2-NEXT:    movl %edi, %esi
+; X86-BMI2-NEXT:    xorl %edi, %edi
+; X86-BMI2-NEXT:  .LBB13_4:
+; X86-BMI2-NEXT:    addl $-1, %edi
+; X86-BMI2-NEXT:    adcl $-1, %esi
+; X86-BMI2-NEXT:    andl %ebx, %edi
+; X86-BMI2-NEXT:    andl %ebp, %esi
+; X86-BMI2-NEXT:    subl $8, %esp
+; X86-BMI2-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT:    pushl %eax
+; X86-BMI2-NEXT:    calll use64 at PLT
+; X86-BMI2-NEXT:    addl $16, %esp
+; X86-BMI2-NEXT:    movl %edi, %eax
+; X86-BMI2-NEXT:    movl %esi, %edx
+; X86-BMI2-NEXT:    addl $12, %esp
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %edi
+; X86-BMI2-NEXT:    popl %ebx
+; X86-BMI2-NEXT:    popl %ebp
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_a5_skipextrauses:
 ; X64-NOBMI:       # %bb.0:
@@ -1581,29 +1581,29 @@ define i64 @bextr64_a5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
 ; X64-NOBMI-NEXT:    popq %rbx
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr64_a5_skipextrauses:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    pushq %rbx
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrq %rax, %rdi, %rbx
-; X64-BMI1NOTBM-NEXT:    movq %rsi, %rdi
-; X64-BMI1NOTBM-NEXT:    callq use64 at PLT
-; X64-BMI1NOTBM-NEXT:    movq %rbx, %rax
-; X64-BMI1NOTBM-NEXT:    popq %rbx
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr64_a5_skipextrauses:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    pushq %rbx
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %rax
-; X64-BMI1BMI2-NEXT:    bzhiq %rdx, %rax, %rbx
-; X64-BMI1BMI2-NEXT:    movq %rsi, %rdi
-; X64-BMI1BMI2-NEXT:    callq use64 at PLT
-; X64-BMI1BMI2-NEXT:    movq %rbx, %rax
-; X64-BMI1BMI2-NEXT:    popq %rbx
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr64_a5_skipextrauses:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    pushq %rbx
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrq %rax, %rdi, %rbx
+; X64-BMI1-NEXT:    movq %rsi, %rdi
+; X64-BMI1-NEXT:    callq use64 at PLT
+; X64-BMI1-NEXT:    movq %rbx, %rax
+; X64-BMI1-NEXT:    popq %rbx
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr64_a5_skipextrauses:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    pushq %rbx
+; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %rax
+; X64-BMI2-NEXT:    bzhiq %rdx, %rax, %rbx
+; X64-BMI2-NEXT:    movq %rsi, %rdi
+; X64-BMI2-NEXT:    callq use64 at PLT
+; X64-BMI2-NEXT:    movq %rbx, %rax
+; X64-BMI2-NEXT:    popq %rbx
+; X64-BMI2-NEXT:    retq
   %shifted = lshr i64 %val, %numskipbits
   %onebit = shl i64 1, %numlowbits
   %mask = add nsw i64 %onebit, -1
@@ -1647,61 +1647,61 @@ define i32 @bextr64_32_a0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr64_32_a0:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %dl
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %esi
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %eax
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB14_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %eax, %esi
-; X86-BMI1NOTBM-NEXT:  .LBB14_2:
-; X86-BMI1NOTBM-NEXT:    movl $1, %edi
-; X86-BMI1NOTBM-NEXT:    movl %edx, %ecx
-; X86-BMI1NOTBM-NEXT:    shll %cl, %edi
-; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
-; X86-BMI1NOTBM-NEXT:    testb $32, %dl
-; X86-BMI1NOTBM-NEXT:    jne .LBB14_4
-; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl %edi, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB14_4:
-; X86-BMI1NOTBM-NEXT:    decl %eax
-; X86-BMI1NOTBM-NEXT:    andl %esi, %eax
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr64_32_a0:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %edx
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB14_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB14_2:
-; X86-BMI1BMI2-NEXT:    xorl %eax, %eax
-; X86-BMI1BMI2-NEXT:    testb $32, %bl
-; X86-BMI1BMI2-NEXT:    jne .LBB14_4
-; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    movl $1, %eax
-; X86-BMI1BMI2-NEXT:    shlxl %ebx, %eax, %eax
-; X86-BMI1BMI2-NEXT:  .LBB14_4:
-; X86-BMI1BMI2-NEXT:    decl %eax
-; X86-BMI1BMI2-NEXT:    andl %edx, %eax
-; X86-BMI1BMI2-NEXT:    popl %ebx
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr64_32_a0:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI1-NEXT:    movl %edi, %esi
+; X86-BMI1-NEXT:    shrl %cl, %esi
+; X86-BMI1-NEXT:    shrdl %cl, %edi, %eax
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    jne .LBB14_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %eax, %esi
+; X86-BMI1-NEXT:  .LBB14_2:
+; X86-BMI1-NEXT:    movl $1, %edi
+; X86-BMI1-NEXT:    movl %edx, %ecx
+; X86-BMI1-NEXT:    shll %cl, %edi
+; X86-BMI1-NEXT:    xorl %eax, %eax
+; X86-BMI1-NEXT:    testb $32, %dl
+; X86-BMI1-NEXT:    jne .LBB14_4
+; X86-BMI1-NEXT:  # %bb.3:
+; X86-BMI1-NEXT:    movl %edi, %eax
+; X86-BMI1-NEXT:  .LBB14_4:
+; X86-BMI1-NEXT:    decl %eax
+; X86-BMI1-NEXT:    andl %esi, %eax
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr64_32_a0:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    shrdl %cl, %eax, %edx
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB14_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    shrxl %ecx, %eax, %edx
+; X86-BMI2-NEXT:  .LBB14_2:
+; X86-BMI2-NEXT:    xorl %eax, %eax
+; X86-BMI2-NEXT:    testb $32, %bl
+; X86-BMI2-NEXT:    jne .LBB14_4
+; X86-BMI2-NEXT:  # %bb.3:
+; X86-BMI2-NEXT:    movl $1, %eax
+; X86-BMI2-NEXT:    shlxl %ebx, %eax, %eax
+; X86-BMI2-NEXT:  .LBB14_4:
+; X86-BMI2-NEXT:    decl %eax
+; X86-BMI2-NEXT:    andl %edx, %eax
+; X86-BMI2-NEXT:    popl %ebx
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_32_a0:
 ; X64-NOBMI:       # %bb.0:
@@ -1716,20 +1716,20 @@ define i32 @bextr64_32_a0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr64_32_a0:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrq %rax, %rdi, %rax
-; X64-BMI1NOTBM-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr64_32_a0:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %rax
-; X64-BMI1BMI2-NEXT:    bzhil %edx, %eax, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr64_32_a0:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrq %rax, %rdi, %rax
+; X64-BMI1-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr64_32_a0:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %rax
+; X64-BMI2-NEXT:    bzhil %edx, %eax, %eax
+; X64-BMI2-NEXT:    retq
   %shifted = lshr i64 %val, %numskipbits
   %onebit = shl i64 1, %numlowbits
   %mask = add nsw i64 %onebit, -1
@@ -1765,44 +1765,44 @@ define i32 @bextr64_32_a1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr64_32_a1:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-BMI1NOTBM-NEXT:    movl %edi, %edx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %esi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB15_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %esi, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB15_2:
-; X86-BMI1NOTBM-NEXT:    shll $8, %eax
-; X86-BMI1NOTBM-NEXT:    bextrl %eax, %edx, %eax
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr64_32_a1:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edx
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB15_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %edx
-; X86-BMI1BMI2-NEXT:  .LBB15_2:
-; X86-BMI1BMI2-NEXT:    bzhil %eax, %edx, %eax
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr64_32_a1:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI1-NEXT:    movl %edi, %edx
+; X86-BMI1-NEXT:    shrl %cl, %edx
+; X86-BMI1-NEXT:    shrdl %cl, %edi, %esi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    jne .LBB15_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %esi, %edx
+; X86-BMI1-NEXT:  .LBB15_2:
+; X86-BMI1-NEXT:    shll $8, %eax
+; X86-BMI1-NEXT:    bextrl %eax, %edx, %eax
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr64_32_a1:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    shrdl %cl, %esi, %edx
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB15_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    shrxl %ecx, %esi, %edx
+; X86-BMI2-NEXT:  .LBB15_2:
+; X86-BMI2-NEXT:    bzhil %eax, %edx, %eax
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_32_a1:
 ; X64-NOBMI:       # %bb.0:
@@ -1816,20 +1816,20 @@ define i32 @bextr64_32_a1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X64-NOBMI-NEXT:    andl %edi, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr64_32_a1:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrq %rax, %rdi, %rax
-; X64-BMI1NOTBM-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr64_32_a1:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %rax
-; X64-BMI1BMI2-NEXT:    bzhil %edx, %eax, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr64_32_a1:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrq %rax, %rdi, %rax
+; X64-BMI1-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr64_32_a1:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %rax
+; X64-BMI2-NEXT:    bzhil %edx, %eax, %eax
+; X64-BMI2-NEXT:    retq
   %shifted = lshr i64 %val, %numskipbits
   %truncshifted = trunc i64 %shifted to i32
   %onebit = shl i32 1, %numlowbits
@@ -1870,54 +1870,54 @@ define i32 @bextr64_32_a1_trunc_extrause(i64 %val, i64 %numskipbits, i32 %numlow
 ; X86-NOBMI-NEXT:    popl %ebx
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr64_32_a1_trunc_extrause:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %ebx
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    pushl %eax
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %bl
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1NOTBM-NEXT:    movl %edx, %esi
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %esi
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edx, %eax
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB16_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %eax, %esi
-; X86-BMI1NOTBM-NEXT:  .LBB16_2:
-; X86-BMI1NOTBM-NEXT:    movl %esi, (%esp)
-; X86-BMI1NOTBM-NEXT:    calll use32 at PLT
-; X86-BMI1NOTBM-NEXT:    shll $8, %ebx
-; X86-BMI1NOTBM-NEXT:    bextrl %ebx, %esi, %eax
-; X86-BMI1NOTBM-NEXT:    addl $4, %esp
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %ebx
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr64_32_a1_trunc_extrause:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    pushl %eax
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %esi
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB16_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %esi
-; X86-BMI1BMI2-NEXT:  .LBB16_2:
-; X86-BMI1BMI2-NEXT:    movl %esi, (%esp)
-; X86-BMI1BMI2-NEXT:    calll use32 at PLT
-; X86-BMI1BMI2-NEXT:    bzhil %ebx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    addl $4, %esp
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %ebx
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr64_32_a1_trunc_extrause:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %ebx
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    pushl %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI1-NEXT:    movl %edx, %esi
+; X86-BMI1-NEXT:    shrl %cl, %esi
+; X86-BMI1-NEXT:    shrdl %cl, %edx, %eax
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    jne .LBB16_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %eax, %esi
+; X86-BMI1-NEXT:  .LBB16_2:
+; X86-BMI1-NEXT:    movl %esi, (%esp)
+; X86-BMI1-NEXT:    calll use32 at PLT
+; X86-BMI1-NEXT:    shll $8, %ebx
+; X86-BMI1-NEXT:    bextrl %ebx, %esi, %eax
+; X86-BMI1-NEXT:    addl $4, %esp
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %ebx
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr64_32_a1_trunc_extrause:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    pushl %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    shrdl %cl, %eax, %esi
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB16_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    shrxl %ecx, %eax, %esi
+; X86-BMI2-NEXT:  .LBB16_2:
+; X86-BMI2-NEXT:    movl %esi, (%esp)
+; X86-BMI2-NEXT:    calll use32 at PLT
+; X86-BMI2-NEXT:    bzhil %ebx, %esi, %eax
+; X86-BMI2-NEXT:    addl $4, %esp
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %ebx
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_32_a1_trunc_extrause:
 ; X64-NOBMI:       # %bb.0:
@@ -1941,39 +1941,39 @@ define i32 @bextr64_32_a1_trunc_extrause(i64 %val, i64 %numskipbits, i32 %numlow
 ; X64-NOBMI-NEXT:    popq %rbp
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr64_32_a1_trunc_extrause:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    pushq %rbp
-; X64-BMI1NOTBM-NEXT:    pushq %rbx
-; X64-BMI1NOTBM-NEXT:    pushq %rax
-; X64-BMI1NOTBM-NEXT:    movl %edx, %ebp
-; X64-BMI1NOTBM-NEXT:    movq %rsi, %rcx
-; X64-BMI1NOTBM-NEXT:    movq %rdi, %rbx
-; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $rcx
-; X64-BMI1NOTBM-NEXT:    shrq %cl, %rbx
-; X64-BMI1NOTBM-NEXT:    movl %ebx, %edi
-; X64-BMI1NOTBM-NEXT:    callq use32 at PLT
-; X64-BMI1NOTBM-NEXT:    shll $8, %ebp
-; X64-BMI1NOTBM-NEXT:    bextrl %ebp, %ebx, %eax
-; X64-BMI1NOTBM-NEXT:    addq $8, %rsp
-; X64-BMI1NOTBM-NEXT:    popq %rbx
-; X64-BMI1NOTBM-NEXT:    popq %rbp
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr64_32_a1_trunc_extrause:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    pushq %rbp
-; X64-BMI1BMI2-NEXT:    pushq %rbx
-; X64-BMI1BMI2-NEXT:    pushq %rax
-; X64-BMI1BMI2-NEXT:    movl %edx, %ebp
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %rbx
-; X64-BMI1BMI2-NEXT:    movl %ebx, %edi
-; X64-BMI1BMI2-NEXT:    callq use32 at PLT
-; X64-BMI1BMI2-NEXT:    bzhil %ebp, %ebx, %eax
-; X64-BMI1BMI2-NEXT:    addq $8, %rsp
-; X64-BMI1BMI2-NEXT:    popq %rbx
-; X64-BMI1BMI2-NEXT:    popq %rbp
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr64_32_a1_trunc_extrause:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    pushq %rbp
+; X64-BMI1-NEXT:    pushq %rbx
+; X64-BMI1-NEXT:    pushq %rax
+; X64-BMI1-NEXT:    movl %edx, %ebp
+; X64-BMI1-NEXT:    movq %rsi, %rcx
+; X64-BMI1-NEXT:    movq %rdi, %rbx
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI1-NEXT:    shrq %cl, %rbx
+; X64-BMI1-NEXT:    movl %ebx, %edi
+; X64-BMI1-NEXT:    callq use32 at PLT
+; X64-BMI1-NEXT:    shll $8, %ebp
+; X64-BMI1-NEXT:    bextrl %ebp, %ebx, %eax
+; X64-BMI1-NEXT:    addq $8, %rsp
+; X64-BMI1-NEXT:    popq %rbx
+; X64-BMI1-NEXT:    popq %rbp
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr64_32_a1_trunc_extrause:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    pushq %rbp
+; X64-BMI2-NEXT:    pushq %rbx
+; X64-BMI2-NEXT:    pushq %rax
+; X64-BMI2-NEXT:    movl %edx, %ebp
+; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %rbx
+; X64-BMI2-NEXT:    movl %ebx, %edi
+; X64-BMI2-NEXT:    callq use32 at PLT
+; X64-BMI2-NEXT:    bzhil %ebp, %ebx, %eax
+; X64-BMI2-NEXT:    addq $8, %rsp
+; X64-BMI2-NEXT:    popq %rbx
+; X64-BMI2-NEXT:    popq %rbp
+; X64-BMI2-NEXT:    retq
   %shifted = lshr i64 %val, %numskipbits
   %truncshifted = trunc i64 %shifted to i32
   call void @use32(i32 %truncshifted)
@@ -2011,44 +2011,44 @@ define i32 @bextr64_32_a2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr64_32_a2:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-BMI1NOTBM-NEXT:    movl %edi, %edx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %esi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB17_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %esi, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB17_2:
-; X86-BMI1NOTBM-NEXT:    shll $8, %eax
-; X86-BMI1NOTBM-NEXT:    bextrl %eax, %edx, %eax
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr64_32_a2:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edx
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB17_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %edx
-; X86-BMI1BMI2-NEXT:  .LBB17_2:
-; X86-BMI1BMI2-NEXT:    bzhil %eax, %edx, %eax
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr64_32_a2:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI1-NEXT:    movl %edi, %edx
+; X86-BMI1-NEXT:    shrl %cl, %edx
+; X86-BMI1-NEXT:    shrdl %cl, %edi, %esi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    jne .LBB17_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %esi, %edx
+; X86-BMI1-NEXT:  .LBB17_2:
+; X86-BMI1-NEXT:    shll $8, %eax
+; X86-BMI1-NEXT:    bextrl %eax, %edx, %eax
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr64_32_a2:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    shrdl %cl, %esi, %edx
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB17_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    shrxl %ecx, %esi, %edx
+; X86-BMI2-NEXT:  .LBB17_2:
+; X86-BMI2-NEXT:    bzhil %eax, %edx, %eax
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_32_a2:
 ; X64-NOBMI:       # %bb.0:
@@ -2062,20 +2062,20 @@ define i32 @bextr64_32_a2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X64-NOBMI-NEXT:    andl %edi, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr64_32_a2:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrq %rax, %rdi, %rax
-; X64-BMI1NOTBM-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr64_32_a2:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %rax
-; X64-BMI1BMI2-NEXT:    bzhil %edx, %eax, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr64_32_a2:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrq %rax, %rdi, %rax
+; X64-BMI1-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr64_32_a2:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %rax
+; X64-BMI2-NEXT:    bzhil %edx, %eax, %eax
+; X64-BMI2-NEXT:    retq
   %shifted = lshr i64 %val, %numskipbits
   %onebit = shl i32 1, %numlowbits
   %mask = add nsw i32 %onebit, -1
@@ -2119,61 +2119,61 @@ define i32 @bextr64_32_a3(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr64_32_a3:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %dl
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %esi
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %eax
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB18_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %eax, %esi
-; X86-BMI1NOTBM-NEXT:  .LBB18_2:
-; X86-BMI1NOTBM-NEXT:    movl $1, %edi
-; X86-BMI1NOTBM-NEXT:    movl %edx, %ecx
-; X86-BMI1NOTBM-NEXT:    shll %cl, %edi
-; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
-; X86-BMI1NOTBM-NEXT:    testb $32, %dl
-; X86-BMI1NOTBM-NEXT:    jne .LBB18_4
-; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl %edi, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB18_4:
-; X86-BMI1NOTBM-NEXT:    decl %eax
-; X86-BMI1NOTBM-NEXT:    andl %esi, %eax
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr64_32_a3:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %edx
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB18_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB18_2:
-; X86-BMI1BMI2-NEXT:    xorl %eax, %eax
-; X86-BMI1BMI2-NEXT:    testb $32, %bl
-; X86-BMI1BMI2-NEXT:    jne .LBB18_4
-; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    movl $1, %eax
-; X86-BMI1BMI2-NEXT:    shlxl %ebx, %eax, %eax
-; X86-BMI1BMI2-NEXT:  .LBB18_4:
-; X86-BMI1BMI2-NEXT:    decl %eax
-; X86-BMI1BMI2-NEXT:    andl %edx, %eax
-; X86-BMI1BMI2-NEXT:    popl %ebx
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr64_32_a3:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI1-NEXT:    movl %edi, %esi
+; X86-BMI1-NEXT:    shrl %cl, %esi
+; X86-BMI1-NEXT:    shrdl %cl, %edi, %eax
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    jne .LBB18_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %eax, %esi
+; X86-BMI1-NEXT:  .LBB18_2:
+; X86-BMI1-NEXT:    movl $1, %edi
+; X86-BMI1-NEXT:    movl %edx, %ecx
+; X86-BMI1-NEXT:    shll %cl, %edi
+; X86-BMI1-NEXT:    xorl %eax, %eax
+; X86-BMI1-NEXT:    testb $32, %dl
+; X86-BMI1-NEXT:    jne .LBB18_4
+; X86-BMI1-NEXT:  # %bb.3:
+; X86-BMI1-NEXT:    movl %edi, %eax
+; X86-BMI1-NEXT:  .LBB18_4:
+; X86-BMI1-NEXT:    decl %eax
+; X86-BMI1-NEXT:    andl %esi, %eax
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr64_32_a3:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    shrdl %cl, %eax, %edx
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB18_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    shrxl %ecx, %eax, %edx
+; X86-BMI2-NEXT:  .LBB18_2:
+; X86-BMI2-NEXT:    xorl %eax, %eax
+; X86-BMI2-NEXT:    testb $32, %bl
+; X86-BMI2-NEXT:    jne .LBB18_4
+; X86-BMI2-NEXT:  # %bb.3:
+; X86-BMI2-NEXT:    movl $1, %eax
+; X86-BMI2-NEXT:    shlxl %ebx, %eax, %eax
+; X86-BMI2-NEXT:  .LBB18_4:
+; X86-BMI2-NEXT:    decl %eax
+; X86-BMI2-NEXT:    andl %edx, %eax
+; X86-BMI2-NEXT:    popl %ebx
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_32_a3:
 ; X64-NOBMI:       # %bb.0:
@@ -2188,20 +2188,20 @@ define i32 @bextr64_32_a3(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr64_32_a3:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrq %rax, %rdi, %rax
-; X64-BMI1NOTBM-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr64_32_a3:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %rax
-; X64-BMI1BMI2-NEXT:    bzhil %edx, %eax, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr64_32_a3:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrq %rax, %rdi, %rax
+; X64-BMI1-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr64_32_a3:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %rax
+; X64-BMI2-NEXT:    bzhil %edx, %eax, %eax
+; X64-BMI2-NEXT:    retq
   %shifted = lshr i64 %val, %numskipbits
   %onebit = shl i64 1, %numlowbits
   %mask = add nsw i64 %onebit, 4294967295
@@ -2230,22 +2230,22 @@ define i32 @bextr32_b0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr32_b0:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1NOTBM-NEXT:    shll $8, %eax
-; X86-BMI1NOTBM-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1NOTBM-NEXT:    orl %eax, %ecx
-; X86-BMI1NOTBM-NEXT:    bextrl %ecx, {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr32_b0:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    bzhil %eax, %ecx, %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr32_b0:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    shll $8, %eax
+; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    orl %eax, %ecx
+; X86-BMI1-NEXT:    bextrl %ecx, {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr32_b0:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    shrxl %ecx, {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    bzhil %eax, %ecx, %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr32_b0:
 ; X64-NOBMI:       # %bb.0:
@@ -2259,19 +2259,19 @@ define i32 @bextr32_b0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    andl %edi, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr32_b0:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrl %eax, %edi, %eax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr32_b0:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxl %esi, %edi, %eax
-; X64-BMI1BMI2-NEXT:    bzhil %edx, %eax, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr32_b0:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrl %eax, %edi, %eax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr32_b0:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxl %esi, %edi, %eax
+; X64-BMI2-NEXT:    bzhil %edx, %eax, %eax
+; X64-BMI2-NEXT:    retq
   %shifted = lshr i32 %val, %numskipbits
   %notmask = shl i32 -1, %numlowbits
   %mask = xor i32 %notmask, -1
@@ -2295,22 +2295,22 @@ define i32 @bextr32_b1_indexzext(i32 %val, i8 zeroext %numskipbits, i8 zeroext %
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr32_b1_indexzext:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1NOTBM-NEXT:    shll $8, %eax
-; X86-BMI1NOTBM-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1NOTBM-NEXT:    orl %eax, %ecx
-; X86-BMI1NOTBM-NEXT:    bextrl %ecx, {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr32_b1_indexzext:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    bzhil %eax, %ecx, %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr32_b1_indexzext:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    shll $8, %eax
+; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    orl %eax, %ecx
+; X86-BMI1-NEXT:    bextrl %ecx, {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr32_b1_indexzext:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    shrxl %ecx, {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    bzhil %eax, %ecx, %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr32_b1_indexzext:
 ; X64-NOBMI:       # %bb.0:
@@ -2324,19 +2324,19 @@ define i32 @bextr32_b1_indexzext(i32 %val, i8 zeroext %numskipbits, i8 zeroext %
 ; X64-NOBMI-NEXT:    andl %edi, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr32_b1_indexzext:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrl %eax, %edi, %eax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr32_b1_indexzext:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxl %esi, %edi, %eax
-; X64-BMI1BMI2-NEXT:    bzhil %edx, %eax, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr32_b1_indexzext:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrl %eax, %edi, %eax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr32_b1_indexzext:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxl %esi, %edi, %eax
+; X64-BMI2-NEXT:    bzhil %edx, %eax, %eax
+; X64-BMI2-NEXT:    retq
   %skip = zext i8 %numskipbits to i32
   %shifted = lshr i32 %val, %skip
   %conv = zext i8 %numlowbits to i32
@@ -2363,24 +2363,24 @@ define i32 @bextr32_b2_load(i32* %w, i32 %numskipbits, i32 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr32_b2_load:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    shll $8, %ecx
-; X86-BMI1NOTBM-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; X86-BMI1NOTBM-NEXT:    orl %ecx, %edx
-; X86-BMI1NOTBM-NEXT:    bextrl %edx, (%eax), %eax
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr32_b2_load:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %dl
-; X86-BMI1BMI2-NEXT:    shrxl %edx, (%ecx), %ecx
-; X86-BMI1BMI2-NEXT:    bzhil %eax, %ecx, %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr32_b2_load:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    shll $8, %ecx
+; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-BMI1-NEXT:    orl %ecx, %edx
+; X86-BMI1-NEXT:    bextrl %edx, (%eax), %eax
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr32_b2_load:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-BMI2-NEXT:    shrxl %edx, (%ecx), %ecx
+; X86-BMI2-NEXT:    bzhil %eax, %ecx, %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr32_b2_load:
 ; X64-NOBMI:       # %bb.0:
@@ -2395,19 +2395,19 @@ define i32 @bextr32_b2_load(i32* %w, i32 %numskipbits, i32 %numlowbits) nounwind
 ; X64-NOBMI-NEXT:    andl %esi, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr32_b2_load:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrl %eax, (%rdi), %eax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr32_b2_load:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxl %esi, (%rdi), %eax
-; X64-BMI1BMI2-NEXT:    bzhil %edx, %eax, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr32_b2_load:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrl %eax, (%rdi), %eax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr32_b2_load:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxl %esi, (%rdi), %eax
+; X64-BMI2-NEXT:    bzhil %edx, %eax, %eax
+; X64-BMI2-NEXT:    retq
   %val = load i32, i32* %w
   %shifted = lshr i32 %val, %numskipbits
   %notmask = shl i32 -1, %numlowbits
@@ -2433,24 +2433,24 @@ define i32 @bextr32_b3_load_indexzext(i32* %w, i8 zeroext %numskipbits, i8 zeroe
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr32_b3_load_indexzext:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    shll $8, %ecx
-; X86-BMI1NOTBM-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; X86-BMI1NOTBM-NEXT:    orl %ecx, %edx
-; X86-BMI1NOTBM-NEXT:    bextrl %edx, (%eax), %eax
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr32_b3_load_indexzext:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %dl
-; X86-BMI1BMI2-NEXT:    shrxl %edx, (%ecx), %ecx
-; X86-BMI1BMI2-NEXT:    bzhil %eax, %ecx, %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr32_b3_load_indexzext:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    shll $8, %ecx
+; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-BMI1-NEXT:    orl %ecx, %edx
+; X86-BMI1-NEXT:    bextrl %edx, (%eax), %eax
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr32_b3_load_indexzext:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-BMI2-NEXT:    shrxl %edx, (%ecx), %ecx
+; X86-BMI2-NEXT:    bzhil %eax, %ecx, %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr32_b3_load_indexzext:
 ; X64-NOBMI:       # %bb.0:
@@ -2465,19 +2465,19 @@ define i32 @bextr32_b3_load_indexzext(i32* %w, i8 zeroext %numskipbits, i8 zeroe
 ; X64-NOBMI-NEXT:    andl %esi, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr32_b3_load_indexzext:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrl %eax, (%rdi), %eax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr32_b3_load_indexzext:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxl %esi, (%rdi), %eax
-; X64-BMI1BMI2-NEXT:    bzhil %edx, %eax, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr32_b3_load_indexzext:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrl %eax, (%rdi), %eax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr32_b3_load_indexzext:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxl %esi, (%rdi), %eax
+; X64-BMI2-NEXT:    bzhil %edx, %eax, %eax
+; X64-BMI2-NEXT:    retq
   %val = load i32, i32* %w
   %skip = zext i8 %numskipbits to i32
   %shifted = lshr i32 %val, %skip
@@ -2504,22 +2504,22 @@ define i32 @bextr32_b4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits)
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr32_b4_commutative:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1NOTBM-NEXT:    shll $8, %eax
-; X86-BMI1NOTBM-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1NOTBM-NEXT:    orl %eax, %ecx
-; X86-BMI1NOTBM-NEXT:    bextrl %ecx, {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr32_b4_commutative:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    bzhil %eax, %ecx, %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr32_b4_commutative:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    shll $8, %eax
+; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    orl %eax, %ecx
+; X86-BMI1-NEXT:    bextrl %ecx, {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr32_b4_commutative:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    shrxl %ecx, {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    bzhil %eax, %ecx, %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr32_b4_commutative:
 ; X64-NOBMI:       # %bb.0:
@@ -2533,19 +2533,19 @@ define i32 @bextr32_b4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits)
 ; X64-NOBMI-NEXT:    andl %edi, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr32_b4_commutative:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrl %eax, %edi, %eax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr32_b4_commutative:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxl %esi, %edi, %eax
-; X64-BMI1BMI2-NEXT:    bzhil %edx, %eax, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr32_b4_commutative:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrl %eax, %edi, %eax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr32_b4_commutative:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxl %esi, %edi, %eax
+; X64-BMI2-NEXT:    bzhil %edx, %eax, %eax
+; X64-BMI2-NEXT:    retq
   %shifted = lshr i32 %val, %numskipbits
   %notmask = shl i32 -1, %numlowbits
   %mask = xor i32 %notmask, -1
@@ -2577,37 +2577,37 @@ define i32 @bextr32_b5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr32_b5_skipextrauses:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    subl $8, %esp
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    shll $8, %ecx
-; X86-BMI1NOTBM-NEXT:    movzbl %al, %edx
-; X86-BMI1NOTBM-NEXT:    orl %ecx, %edx
-; X86-BMI1NOTBM-NEXT:    bextrl %edx, {{[0-9]+}}(%esp), %esi
-; X86-BMI1NOTBM-NEXT:    movl %eax, (%esp)
-; X86-BMI1NOTBM-NEXT:    calll use32 at PLT
-; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:    addl $8, %esp
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr32_b5_skipextrauses:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    subl $8, %esp
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT:    bzhil %eax, %edx, %esi
-; X86-BMI1BMI2-NEXT:    movl %ecx, (%esp)
-; X86-BMI1BMI2-NEXT:    calll use32 at PLT
-; X86-BMI1BMI2-NEXT:    movl %esi, %eax
-; X86-BMI1BMI2-NEXT:    addl $8, %esp
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr32_b5_skipextrauses:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    subl $8, %esp
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    shll $8, %ecx
+; X86-BMI1-NEXT:    movzbl %al, %edx
+; X86-BMI1-NEXT:    orl %ecx, %edx
+; X86-BMI1-NEXT:    bextrl %edx, {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    movl %eax, (%esp)
+; X86-BMI1-NEXT:    calll use32 at PLT
+; X86-BMI1-NEXT:    movl %esi, %eax
+; X86-BMI1-NEXT:    addl $8, %esp
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr32_b5_skipextrauses:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    subl $8, %esp
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    shrxl %ecx, {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    bzhil %eax, %edx, %esi
+; X86-BMI2-NEXT:    movl %ecx, (%esp)
+; X86-BMI2-NEXT:    calll use32 at PLT
+; X86-BMI2-NEXT:    movl %esi, %eax
+; X86-BMI2-NEXT:    addl $8, %esp
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr32_b5_skipextrauses:
 ; X64-NOBMI:       # %bb.0:
@@ -2625,29 +2625,29 @@ define i32 @bextr32_b5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits
 ; X64-NOBMI-NEXT:    popq %rbx
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr32_b5_skipextrauses:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    pushq %rbx
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrl %eax, %edi, %ebx
-; X64-BMI1NOTBM-NEXT:    movl %esi, %edi
-; X64-BMI1NOTBM-NEXT:    callq use32 at PLT
-; X64-BMI1NOTBM-NEXT:    movl %ebx, %eax
-; X64-BMI1NOTBM-NEXT:    popq %rbx
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr32_b5_skipextrauses:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    pushq %rbx
-; X64-BMI1BMI2-NEXT:    shrxl %esi, %edi, %eax
-; X64-BMI1BMI2-NEXT:    bzhil %edx, %eax, %ebx
-; X64-BMI1BMI2-NEXT:    movl %esi, %edi
-; X64-BMI1BMI2-NEXT:    callq use32 at PLT
-; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI1BMI2-NEXT:    popq %rbx
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr32_b5_skipextrauses:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    pushq %rbx
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrl %eax, %edi, %ebx
+; X64-BMI1-NEXT:    movl %esi, %edi
+; X64-BMI1-NEXT:    callq use32 at PLT
+; X64-BMI1-NEXT:    movl %ebx, %eax
+; X64-BMI1-NEXT:    popq %rbx
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr32_b5_skipextrauses:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    pushq %rbx
+; X64-BMI2-NEXT:    shrxl %esi, %edi, %eax
+; X64-BMI2-NEXT:    bzhil %edx, %eax, %ebx
+; X64-BMI2-NEXT:    movl %esi, %edi
+; X64-BMI2-NEXT:    callq use32 at PLT
+; X64-BMI2-NEXT:    movl %ebx, %eax
+; X64-BMI2-NEXT:    popq %rbx
+; X64-BMI2-NEXT:    retq
   %shifted = lshr i32 %val, %numskipbits
   %notmask = shl i32 -1, %numlowbits
   %mask = xor i32 %notmask, -1
@@ -2699,70 +2699,70 @@ define i64 @bextr64_b0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    popl %ebx
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr64_b0:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %ebx
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-BMI1NOTBM-NEXT:    movl %edi, %edx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %esi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB25_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %edx, %esi
-; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB25_2:
-; X86-BMI1NOTBM-NEXT:    movl $-1, %edi
-; X86-BMI1NOTBM-NEXT:    movl $-1, %ebx
-; X86-BMI1NOTBM-NEXT:    movl %eax, %ecx
-; X86-BMI1NOTBM-NEXT:    shll %cl, %ebx
-; X86-BMI1NOTBM-NEXT:    testb $32, %al
-; X86-BMI1NOTBM-NEXT:    je .LBB25_4
-; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl %ebx, %edi
-; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB25_4:
-; X86-BMI1NOTBM-NEXT:    andnl %edx, %edi, %edx
-; X86-BMI1NOTBM-NEXT:    andnl %esi, %ebx, %eax
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    popl %ebx
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr64_b0:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %edx, %eax
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edx, %edx
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB25_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %edx, %eax
-; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI1BMI2-NEXT:  .LBB25_2:
-; X86-BMI1BMI2-NEXT:    movl $-1, %esi
-; X86-BMI1BMI2-NEXT:    shlxl %ebx, %esi, %ecx
-; X86-BMI1BMI2-NEXT:    testb $32, %bl
-; X86-BMI1BMI2-NEXT:    je .LBB25_4
-; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    movl %ecx, %esi
-; X86-BMI1BMI2-NEXT:    xorl %ecx, %ecx
-; X86-BMI1BMI2-NEXT:  .LBB25_4:
-; X86-BMI1BMI2-NEXT:    andnl %edx, %esi, %edx
-; X86-BMI1BMI2-NEXT:    andnl %eax, %ecx, %eax
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %ebx
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr64_b0:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %ebx
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI1-NEXT:    movl %edi, %edx
+; X86-BMI1-NEXT:    shrl %cl, %edx
+; X86-BMI1-NEXT:    shrdl %cl, %edi, %esi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB25_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %edx, %esi
+; X86-BMI1-NEXT:    xorl %edx, %edx
+; X86-BMI1-NEXT:  .LBB25_2:
+; X86-BMI1-NEXT:    movl $-1, %edi
+; X86-BMI1-NEXT:    movl $-1, %ebx
+; X86-BMI1-NEXT:    movl %eax, %ecx
+; X86-BMI1-NEXT:    shll %cl, %ebx
+; X86-BMI1-NEXT:    testb $32, %al
+; X86-BMI1-NEXT:    je .LBB25_4
+; X86-BMI1-NEXT:  # %bb.3:
+; X86-BMI1-NEXT:    movl %ebx, %edi
+; X86-BMI1-NEXT:    xorl %ebx, %ebx
+; X86-BMI1-NEXT:  .LBB25_4:
+; X86-BMI1-NEXT:    andnl %edx, %edi, %edx
+; X86-BMI1-NEXT:    andnl %esi, %ebx, %eax
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    popl %ebx
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr64_b0:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    shrdl %cl, %edx, %eax
+; X86-BMI2-NEXT:    shrxl %ecx, %edx, %edx
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB25_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %edx, %eax
+; X86-BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI2-NEXT:  .LBB25_2:
+; X86-BMI2-NEXT:    movl $-1, %esi
+; X86-BMI2-NEXT:    shlxl %ebx, %esi, %ecx
+; X86-BMI2-NEXT:    testb $32, %bl
+; X86-BMI2-NEXT:    je .LBB25_4
+; X86-BMI2-NEXT:  # %bb.3:
+; X86-BMI2-NEXT:    movl %ecx, %esi
+; X86-BMI2-NEXT:    xorl %ecx, %ecx
+; X86-BMI2-NEXT:  .LBB25_4:
+; X86-BMI2-NEXT:    andnl %edx, %esi, %edx
+; X86-BMI2-NEXT:    andnl %eax, %ecx, %eax
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %ebx
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_b0:
 ; X64-NOBMI:       # %bb.0:
@@ -2776,19 +2776,19 @@ define i64 @bextr64_b0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    andq %rdi, %rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr64_b0:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrq %rax, %rdi, %rax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr64_b0:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %rax
-; X64-BMI1BMI2-NEXT:    bzhiq %rdx, %rax, %rax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr64_b0:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrq %rax, %rdi, %rax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr64_b0:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %rax
+; X64-BMI2-NEXT:    bzhiq %rdx, %rax, %rax
+; X64-BMI2-NEXT:    retq
   %shifted = lshr i64 %val, %numskipbits
   %notmask = shl i64 -1, %numlowbits
   %mask = xor i64 %notmask, -1
@@ -2837,70 +2837,70 @@ define i64 @bextr64_b1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext %
 ; X86-NOBMI-NEXT:    popl %ebx
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr64_b1_indexzext:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %ebx
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-BMI1NOTBM-NEXT:    movl %edi, %edx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %esi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB26_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %edx, %esi
-; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB26_2:
-; X86-BMI1NOTBM-NEXT:    movl $-1, %edi
-; X86-BMI1NOTBM-NEXT:    movl $-1, %ebx
-; X86-BMI1NOTBM-NEXT:    movl %eax, %ecx
-; X86-BMI1NOTBM-NEXT:    shll %cl, %ebx
-; X86-BMI1NOTBM-NEXT:    testb $32, %al
-; X86-BMI1NOTBM-NEXT:    je .LBB26_4
-; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl %ebx, %edi
-; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB26_4:
-; X86-BMI1NOTBM-NEXT:    andnl %edx, %edi, %edx
-; X86-BMI1NOTBM-NEXT:    andnl %esi, %ebx, %eax
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    popl %ebx
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr64_b1_indexzext:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %edx, %eax
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edx, %edx
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB26_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %edx, %eax
-; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI1BMI2-NEXT:  .LBB26_2:
-; X86-BMI1BMI2-NEXT:    movl $-1, %esi
-; X86-BMI1BMI2-NEXT:    shlxl %ebx, %esi, %ecx
-; X86-BMI1BMI2-NEXT:    testb $32, %bl
-; X86-BMI1BMI2-NEXT:    je .LBB26_4
-; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    movl %ecx, %esi
-; X86-BMI1BMI2-NEXT:    xorl %ecx, %ecx
-; X86-BMI1BMI2-NEXT:  .LBB26_4:
-; X86-BMI1BMI2-NEXT:    andnl %edx, %esi, %edx
-; X86-BMI1BMI2-NEXT:    andnl %eax, %ecx, %eax
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %ebx
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr64_b1_indexzext:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %ebx
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI1-NEXT:    movl %edi, %edx
+; X86-BMI1-NEXT:    shrl %cl, %edx
+; X86-BMI1-NEXT:    shrdl %cl, %edi, %esi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB26_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %edx, %esi
+; X86-BMI1-NEXT:    xorl %edx, %edx
+; X86-BMI1-NEXT:  .LBB26_2:
+; X86-BMI1-NEXT:    movl $-1, %edi
+; X86-BMI1-NEXT:    movl $-1, %ebx
+; X86-BMI1-NEXT:    movl %eax, %ecx
+; X86-BMI1-NEXT:    shll %cl, %ebx
+; X86-BMI1-NEXT:    testb $32, %al
+; X86-BMI1-NEXT:    je .LBB26_4
+; X86-BMI1-NEXT:  # %bb.3:
+; X86-BMI1-NEXT:    movl %ebx, %edi
+; X86-BMI1-NEXT:    xorl %ebx, %ebx
+; X86-BMI1-NEXT:  .LBB26_4:
+; X86-BMI1-NEXT:    andnl %edx, %edi, %edx
+; X86-BMI1-NEXT:    andnl %esi, %ebx, %eax
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    popl %ebx
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr64_b1_indexzext:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    shrdl %cl, %edx, %eax
+; X86-BMI2-NEXT:    shrxl %ecx, %edx, %edx
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB26_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %edx, %eax
+; X86-BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI2-NEXT:  .LBB26_2:
+; X86-BMI2-NEXT:    movl $-1, %esi
+; X86-BMI2-NEXT:    shlxl %ebx, %esi, %ecx
+; X86-BMI2-NEXT:    testb $32, %bl
+; X86-BMI2-NEXT:    je .LBB26_4
+; X86-BMI2-NEXT:  # %bb.3:
+; X86-BMI2-NEXT:    movl %ecx, %esi
+; X86-BMI2-NEXT:    xorl %ecx, %ecx
+; X86-BMI2-NEXT:  .LBB26_4:
+; X86-BMI2-NEXT:    andnl %edx, %esi, %edx
+; X86-BMI2-NEXT:    andnl %eax, %ecx, %eax
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %ebx
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_b1_indexzext:
 ; X64-NOBMI:       # %bb.0:
@@ -2914,21 +2914,21 @@ define i64 @bextr64_b1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext %
 ; X64-NOBMI-NEXT:    andq %rdi, %rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr64_b1_indexzext:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrq %rax, %rdi, %rax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr64_b1_indexzext:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    # kill: def $edx killed $edx def $rdx
-; X64-BMI1BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %rax
-; X64-BMI1BMI2-NEXT:    bzhiq %rdx, %rax, %rax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr64_b1_indexzext:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrq %rax, %rdi, %rax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr64_b1_indexzext:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    # kill: def $edx killed $edx def $rdx
+; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %rax
+; X64-BMI2-NEXT:    bzhiq %rdx, %rax, %rax
+; X64-BMI2-NEXT:    retq
   %skip = zext i8 %numskipbits to i64
   %shifted = lshr i64 %val, %skip
   %conv = zext i8 %numlowbits to i64
@@ -2980,72 +2980,72 @@ define i64 @bextr64_b2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    popl %ebx
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr64_b2_load:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %ebx
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1NOTBM-NEXT:    movl (%edx), %esi
-; X86-BMI1NOTBM-NEXT:    movl 4(%edx), %edi
-; X86-BMI1NOTBM-NEXT:    movl %edi, %edx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %esi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB27_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %edx, %esi
-; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB27_2:
-; X86-BMI1NOTBM-NEXT:    movl $-1, %edi
-; X86-BMI1NOTBM-NEXT:    movl $-1, %ebx
-; X86-BMI1NOTBM-NEXT:    movl %eax, %ecx
-; X86-BMI1NOTBM-NEXT:    shll %cl, %ebx
-; X86-BMI1NOTBM-NEXT:    testb $32, %al
-; X86-BMI1NOTBM-NEXT:    je .LBB27_4
-; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl %ebx, %edi
-; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB27_4:
-; X86-BMI1NOTBM-NEXT:    andnl %edx, %edi, %edx
-; X86-BMI1NOTBM-NEXT:    andnl %esi, %ebx, %eax
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    popl %ebx
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr64_b2_load:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT:    movl (%edx), %eax
-; X86-BMI1BMI2-NEXT:    movl 4(%edx), %esi
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %edx
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %eax
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB27_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %edx, %eax
-; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI1BMI2-NEXT:  .LBB27_2:
-; X86-BMI1BMI2-NEXT:    movl $-1, %esi
-; X86-BMI1BMI2-NEXT:    shlxl %ebx, %esi, %ecx
-; X86-BMI1BMI2-NEXT:    testb $32, %bl
-; X86-BMI1BMI2-NEXT:    je .LBB27_4
-; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    movl %ecx, %esi
-; X86-BMI1BMI2-NEXT:    xorl %ecx, %ecx
-; X86-BMI1BMI2-NEXT:  .LBB27_4:
-; X86-BMI1BMI2-NEXT:    andnl %edx, %esi, %edx
-; X86-BMI1BMI2-NEXT:    andnl %eax, %ecx, %eax
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %ebx
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr64_b2_load:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %ebx
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI1-NEXT:    movl (%edx), %esi
+; X86-BMI1-NEXT:    movl 4(%edx), %edi
+; X86-BMI1-NEXT:    movl %edi, %edx
+; X86-BMI1-NEXT:    shrl %cl, %edx
+; X86-BMI1-NEXT:    shrdl %cl, %edi, %esi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB27_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %edx, %esi
+; X86-BMI1-NEXT:    xorl %edx, %edx
+; X86-BMI1-NEXT:  .LBB27_2:
+; X86-BMI1-NEXT:    movl $-1, %edi
+; X86-BMI1-NEXT:    movl $-1, %ebx
+; X86-BMI1-NEXT:    movl %eax, %ecx
+; X86-BMI1-NEXT:    shll %cl, %ebx
+; X86-BMI1-NEXT:    testb $32, %al
+; X86-BMI1-NEXT:    je .LBB27_4
+; X86-BMI1-NEXT:  # %bb.3:
+; X86-BMI1-NEXT:    movl %ebx, %edi
+; X86-BMI1-NEXT:    xorl %ebx, %ebx
+; X86-BMI1-NEXT:  .LBB27_4:
+; X86-BMI1-NEXT:    andnl %edx, %edi, %edx
+; X86-BMI1-NEXT:    andnl %esi, %ebx, %eax
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    popl %ebx
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr64_b2_load:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    movl (%edx), %eax
+; X86-BMI2-NEXT:    movl 4(%edx), %esi
+; X86-BMI2-NEXT:    shrxl %ecx, %esi, %edx
+; X86-BMI2-NEXT:    shrdl %cl, %esi, %eax
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB27_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %edx, %eax
+; X86-BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI2-NEXT:  .LBB27_2:
+; X86-BMI2-NEXT:    movl $-1, %esi
+; X86-BMI2-NEXT:    shlxl %ebx, %esi, %ecx
+; X86-BMI2-NEXT:    testb $32, %bl
+; X86-BMI2-NEXT:    je .LBB27_4
+; X86-BMI2-NEXT:  # %bb.3:
+; X86-BMI2-NEXT:    movl %ecx, %esi
+; X86-BMI2-NEXT:    xorl %ecx, %ecx
+; X86-BMI2-NEXT:  .LBB27_4:
+; X86-BMI2-NEXT:    andnl %edx, %esi, %edx
+; X86-BMI2-NEXT:    andnl %eax, %ecx, %eax
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %ebx
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_b2_load:
 ; X64-NOBMI:       # %bb.0:
@@ -3060,19 +3060,19 @@ define i64 @bextr64_b2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X64-NOBMI-NEXT:    andq %rsi, %rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr64_b2_load:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrq %rax, (%rdi), %rax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr64_b2_load:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, (%rdi), %rax
-; X64-BMI1BMI2-NEXT:    bzhiq %rdx, %rax, %rax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr64_b2_load:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrq %rax, (%rdi), %rax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr64_b2_load:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxq %rsi, (%rdi), %rax
+; X64-BMI2-NEXT:    bzhiq %rdx, %rax, %rax
+; X64-BMI2-NEXT:    retq
   %val = load i64, i64* %w
   %shifted = lshr i64 %val, %numskipbits
   %notmask = shl i64 -1, %numlowbits
@@ -3123,72 +3123,72 @@ define i64 @bextr64_b3_load_indexzext(i64* %w, i8 zeroext %numskipbits, i8 zeroe
 ; X86-NOBMI-NEXT:    popl %ebx
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr64_b3_load_indexzext:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %ebx
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1NOTBM-NEXT:    movl (%edx), %esi
-; X86-BMI1NOTBM-NEXT:    movl 4(%edx), %edi
-; X86-BMI1NOTBM-NEXT:    movl %edi, %edx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %esi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB28_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %edx, %esi
-; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB28_2:
-; X86-BMI1NOTBM-NEXT:    movl $-1, %edi
-; X86-BMI1NOTBM-NEXT:    movl $-1, %ebx
-; X86-BMI1NOTBM-NEXT:    movl %eax, %ecx
-; X86-BMI1NOTBM-NEXT:    shll %cl, %ebx
-; X86-BMI1NOTBM-NEXT:    testb $32, %al
-; X86-BMI1NOTBM-NEXT:    je .LBB28_4
-; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl %ebx, %edi
-; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB28_4:
-; X86-BMI1NOTBM-NEXT:    andnl %edx, %edi, %edx
-; X86-BMI1NOTBM-NEXT:    andnl %esi, %ebx, %eax
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    popl %ebx
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr64_b3_load_indexzext:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT:    movl (%edx), %eax
-; X86-BMI1BMI2-NEXT:    movl 4(%edx), %esi
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %edx
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %eax
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB28_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %edx, %eax
-; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI1BMI2-NEXT:  .LBB28_2:
-; X86-BMI1BMI2-NEXT:    movl $-1, %esi
-; X86-BMI1BMI2-NEXT:    shlxl %ebx, %esi, %ecx
-; X86-BMI1BMI2-NEXT:    testb $32, %bl
-; X86-BMI1BMI2-NEXT:    je .LBB28_4
-; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    movl %ecx, %esi
-; X86-BMI1BMI2-NEXT:    xorl %ecx, %ecx
-; X86-BMI1BMI2-NEXT:  .LBB28_4:
-; X86-BMI1BMI2-NEXT:    andnl %edx, %esi, %edx
-; X86-BMI1BMI2-NEXT:    andnl %eax, %ecx, %eax
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %ebx
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr64_b3_load_indexzext:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %ebx
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI1-NEXT:    movl (%edx), %esi
+; X86-BMI1-NEXT:    movl 4(%edx), %edi
+; X86-BMI1-NEXT:    movl %edi, %edx
+; X86-BMI1-NEXT:    shrl %cl, %edx
+; X86-BMI1-NEXT:    shrdl %cl, %edi, %esi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB28_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %edx, %esi
+; X86-BMI1-NEXT:    xorl %edx, %edx
+; X86-BMI1-NEXT:  .LBB28_2:
+; X86-BMI1-NEXT:    movl $-1, %edi
+; X86-BMI1-NEXT:    movl $-1, %ebx
+; X86-BMI1-NEXT:    movl %eax, %ecx
+; X86-BMI1-NEXT:    shll %cl, %ebx
+; X86-BMI1-NEXT:    testb $32, %al
+; X86-BMI1-NEXT:    je .LBB28_4
+; X86-BMI1-NEXT:  # %bb.3:
+; X86-BMI1-NEXT:    movl %ebx, %edi
+; X86-BMI1-NEXT:    xorl %ebx, %ebx
+; X86-BMI1-NEXT:  .LBB28_4:
+; X86-BMI1-NEXT:    andnl %edx, %edi, %edx
+; X86-BMI1-NEXT:    andnl %esi, %ebx, %eax
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    popl %ebx
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr64_b3_load_indexzext:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    movl (%edx), %eax
+; X86-BMI2-NEXT:    movl 4(%edx), %esi
+; X86-BMI2-NEXT:    shrxl %ecx, %esi, %edx
+; X86-BMI2-NEXT:    shrdl %cl, %esi, %eax
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB28_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %edx, %eax
+; X86-BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI2-NEXT:  .LBB28_2:
+; X86-BMI2-NEXT:    movl $-1, %esi
+; X86-BMI2-NEXT:    shlxl %ebx, %esi, %ecx
+; X86-BMI2-NEXT:    testb $32, %bl
+; X86-BMI2-NEXT:    je .LBB28_4
+; X86-BMI2-NEXT:  # %bb.3:
+; X86-BMI2-NEXT:    movl %ecx, %esi
+; X86-BMI2-NEXT:    xorl %ecx, %ecx
+; X86-BMI2-NEXT:  .LBB28_4:
+; X86-BMI2-NEXT:    andnl %edx, %esi, %edx
+; X86-BMI2-NEXT:    andnl %eax, %ecx, %eax
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %ebx
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_b3_load_indexzext:
 ; X64-NOBMI:       # %bb.0:
@@ -3203,21 +3203,21 @@ define i64 @bextr64_b3_load_indexzext(i64* %w, i8 zeroext %numskipbits, i8 zeroe
 ; X64-NOBMI-NEXT:    andq %rsi, %rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr64_b3_load_indexzext:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrq %rax, (%rdi), %rax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr64_b3_load_indexzext:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    # kill: def $edx killed $edx def $rdx
-; X64-BMI1BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, (%rdi), %rax
-; X64-BMI1BMI2-NEXT:    bzhiq %rdx, %rax, %rax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr64_b3_load_indexzext:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrq %rax, (%rdi), %rax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr64_b3_load_indexzext:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    # kill: def $edx killed $edx def $rdx
+; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-BMI2-NEXT:    shrxq %rsi, (%rdi), %rax
+; X64-BMI2-NEXT:    bzhiq %rdx, %rax, %rax
+; X64-BMI2-NEXT:    retq
   %val = load i64, i64* %w
   %skip = zext i8 %numskipbits to i64
   %shifted = lshr i64 %val, %skip
@@ -3269,70 +3269,70 @@ define i64 @bextr64_b4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits)
 ; X86-NOBMI-NEXT:    popl %ebx
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr64_b4_commutative:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %ebx
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-BMI1NOTBM-NEXT:    movl %edi, %edx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %esi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB29_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %edx, %esi
-; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB29_2:
-; X86-BMI1NOTBM-NEXT:    movl $-1, %edi
-; X86-BMI1NOTBM-NEXT:    movl $-1, %ebx
-; X86-BMI1NOTBM-NEXT:    movl %eax, %ecx
-; X86-BMI1NOTBM-NEXT:    shll %cl, %ebx
-; X86-BMI1NOTBM-NEXT:    testb $32, %al
-; X86-BMI1NOTBM-NEXT:    je .LBB29_4
-; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl %ebx, %edi
-; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB29_4:
-; X86-BMI1NOTBM-NEXT:    andnl %edx, %edi, %edx
-; X86-BMI1NOTBM-NEXT:    andnl %esi, %ebx, %eax
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    popl %ebx
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr64_b4_commutative:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %edx, %eax
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edx, %edx
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB29_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %edx, %eax
-; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI1BMI2-NEXT:  .LBB29_2:
-; X86-BMI1BMI2-NEXT:    movl $-1, %esi
-; X86-BMI1BMI2-NEXT:    shlxl %ebx, %esi, %ecx
-; X86-BMI1BMI2-NEXT:    testb $32, %bl
-; X86-BMI1BMI2-NEXT:    je .LBB29_4
-; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    movl %ecx, %esi
-; X86-BMI1BMI2-NEXT:    xorl %ecx, %ecx
-; X86-BMI1BMI2-NEXT:  .LBB29_4:
-; X86-BMI1BMI2-NEXT:    andnl %edx, %esi, %edx
-; X86-BMI1BMI2-NEXT:    andnl %eax, %ecx, %eax
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %ebx
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr64_b4_commutative:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %ebx
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI1-NEXT:    movl %edi, %edx
+; X86-BMI1-NEXT:    shrl %cl, %edx
+; X86-BMI1-NEXT:    shrdl %cl, %edi, %esi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB29_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %edx, %esi
+; X86-BMI1-NEXT:    xorl %edx, %edx
+; X86-BMI1-NEXT:  .LBB29_2:
+; X86-BMI1-NEXT:    movl $-1, %edi
+; X86-BMI1-NEXT:    movl $-1, %ebx
+; X86-BMI1-NEXT:    movl %eax, %ecx
+; X86-BMI1-NEXT:    shll %cl, %ebx
+; X86-BMI1-NEXT:    testb $32, %al
+; X86-BMI1-NEXT:    je .LBB29_4
+; X86-BMI1-NEXT:  # %bb.3:
+; X86-BMI1-NEXT:    movl %ebx, %edi
+; X86-BMI1-NEXT:    xorl %ebx, %ebx
+; X86-BMI1-NEXT:  .LBB29_4:
+; X86-BMI1-NEXT:    andnl %edx, %edi, %edx
+; X86-BMI1-NEXT:    andnl %esi, %ebx, %eax
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    popl %ebx
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr64_b4_commutative:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    shrdl %cl, %edx, %eax
+; X86-BMI2-NEXT:    shrxl %ecx, %edx, %edx
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB29_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %edx, %eax
+; X86-BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI2-NEXT:  .LBB29_2:
+; X86-BMI2-NEXT:    movl $-1, %esi
+; X86-BMI2-NEXT:    shlxl %ebx, %esi, %ecx
+; X86-BMI2-NEXT:    testb $32, %bl
+; X86-BMI2-NEXT:    je .LBB29_4
+; X86-BMI2-NEXT:  # %bb.3:
+; X86-BMI2-NEXT:    movl %ecx, %esi
+; X86-BMI2-NEXT:    xorl %ecx, %ecx
+; X86-BMI2-NEXT:  .LBB29_4:
+; X86-BMI2-NEXT:    andnl %edx, %esi, %edx
+; X86-BMI2-NEXT:    andnl %eax, %ecx, %eax
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %ebx
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_b4_commutative:
 ; X64-NOBMI:       # %bb.0:
@@ -3346,19 +3346,19 @@ define i64 @bextr64_b4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits)
 ; X64-NOBMI-NEXT:    andq %rdi, %rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr64_b4_commutative:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrq %rax, %rdi, %rax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr64_b4_commutative:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %rax
-; X64-BMI1BMI2-NEXT:    bzhiq %rdx, %rax, %rax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr64_b4_commutative:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrq %rax, %rdi, %rax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr64_b4_commutative:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %rax
+; X64-BMI2-NEXT:    bzhiq %rdx, %rax, %rax
+; X64-BMI2-NEXT:    retq
   %shifted = lshr i64 %val, %numskipbits
   %notmask = shl i64 -1, %numlowbits
   %mask = xor i64 %notmask, -1
@@ -3419,96 +3419,96 @@ define i64 @bextr64_b5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
 ; X86-NOBMI-NEXT:    popl %ebp
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr64_b5_skipextrauses:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %ebp
-; X86-BMI1NOTBM-NEXT:    pushl %ebx
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    subl $12, %esp
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %dl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movl %ebx, %esi
-; X86-BMI1NOTBM-NEXT:    movl %eax, %ecx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %esi
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %ebx, %edi
-; X86-BMI1NOTBM-NEXT:    testb $32, %al
-; X86-BMI1NOTBM-NEXT:    je .LBB30_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %esi, %edi
-; X86-BMI1NOTBM-NEXT:    xorl %esi, %esi
-; X86-BMI1NOTBM-NEXT:  .LBB30_2:
-; X86-BMI1NOTBM-NEXT:    movl $-1, %ebx
-; X86-BMI1NOTBM-NEXT:    movl $-1, %ebp
-; X86-BMI1NOTBM-NEXT:    movl %edx, %ecx
-; X86-BMI1NOTBM-NEXT:    shll %cl, %ebp
-; X86-BMI1NOTBM-NEXT:    testb $32, %dl
-; X86-BMI1NOTBM-NEXT:    je .LBB30_4
-; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl %ebp, %ebx
-; X86-BMI1NOTBM-NEXT:    xorl %ebp, %ebp
-; X86-BMI1NOTBM-NEXT:  .LBB30_4:
-; X86-BMI1NOTBM-NEXT:    andnl %esi, %ebx, %esi
-; X86-BMI1NOTBM-NEXT:    andnl %edi, %ebp, %edi
-; X86-BMI1NOTBM-NEXT:    subl $8, %esp
-; X86-BMI1NOTBM-NEXT:    pushl {{[0-9]+}}(%esp)
-; X86-BMI1NOTBM-NEXT:    pushl %eax
-; X86-BMI1NOTBM-NEXT:    calll use64 at PLT
-; X86-BMI1NOTBM-NEXT:    addl $16, %esp
-; X86-BMI1NOTBM-NEXT:    movl %edi, %eax
-; X86-BMI1NOTBM-NEXT:    movl %esi, %edx
-; X86-BMI1NOTBM-NEXT:    addl $12, %esp
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    popl %ebx
-; X86-BMI1NOTBM-NEXT:    popl %ebp
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr64_b5_skipextrauses:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebp
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    pushl %edi
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    subl $12, %esp
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %edx, %eax
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edx, %edx
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB30_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %edx, %eax
-; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI1BMI2-NEXT:  .LBB30_2:
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ebp
-; X86-BMI1BMI2-NEXT:    movl $-1, %esi
-; X86-BMI1BMI2-NEXT:    shlxl %ebx, %esi, %edi
-; X86-BMI1BMI2-NEXT:    testb $32, %bl
-; X86-BMI1BMI2-NEXT:    je .LBB30_4
-; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    movl %edi, %esi
-; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
-; X86-BMI1BMI2-NEXT:  .LBB30_4:
-; X86-BMI1BMI2-NEXT:    andnl %edx, %esi, %esi
-; X86-BMI1BMI2-NEXT:    andnl %eax, %edi, %edi
-; X86-BMI1BMI2-NEXT:    subl $8, %esp
-; X86-BMI1BMI2-NEXT:    pushl %ebp
-; X86-BMI1BMI2-NEXT:    pushl %ecx
-; X86-BMI1BMI2-NEXT:    calll use64 at PLT
-; X86-BMI1BMI2-NEXT:    addl $16, %esp
-; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:    movl %esi, %edx
-; X86-BMI1BMI2-NEXT:    addl $12, %esp
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %edi
-; X86-BMI1BMI2-NEXT:    popl %ebx
-; X86-BMI1BMI2-NEXT:    popl %ebp
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr64_b5_skipextrauses:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %ebp
+; X86-BMI1-NEXT:    pushl %ebx
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    subl $12, %esp
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movl %ebx, %esi
+; X86-BMI1-NEXT:    movl %eax, %ecx
+; X86-BMI1-NEXT:    shrl %cl, %esi
+; X86-BMI1-NEXT:    shrdl %cl, %ebx, %edi
+; X86-BMI1-NEXT:    testb $32, %al
+; X86-BMI1-NEXT:    je .LBB30_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %esi, %edi
+; X86-BMI1-NEXT:    xorl %esi, %esi
+; X86-BMI1-NEXT:  .LBB30_2:
+; X86-BMI1-NEXT:    movl $-1, %ebx
+; X86-BMI1-NEXT:    movl $-1, %ebp
+; X86-BMI1-NEXT:    movl %edx, %ecx
+; X86-BMI1-NEXT:    shll %cl, %ebp
+; X86-BMI1-NEXT:    testb $32, %dl
+; X86-BMI1-NEXT:    je .LBB30_4
+; X86-BMI1-NEXT:  # %bb.3:
+; X86-BMI1-NEXT:    movl %ebp, %ebx
+; X86-BMI1-NEXT:    xorl %ebp, %ebp
+; X86-BMI1-NEXT:  .LBB30_4:
+; X86-BMI1-NEXT:    andnl %esi, %ebx, %esi
+; X86-BMI1-NEXT:    andnl %edi, %ebp, %edi
+; X86-BMI1-NEXT:    subl $8, %esp
+; X86-BMI1-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-BMI1-NEXT:    pushl %eax
+; X86-BMI1-NEXT:    calll use64 at PLT
+; X86-BMI1-NEXT:    addl $16, %esp
+; X86-BMI1-NEXT:    movl %edi, %eax
+; X86-BMI1-NEXT:    movl %esi, %edx
+; X86-BMI1-NEXT:    addl $12, %esp
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    popl %ebx
+; X86-BMI1-NEXT:    popl %ebp
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr64_b5_skipextrauses:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %ebp
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    pushl %edi
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    subl $12, %esp
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    shrdl %cl, %edx, %eax
+; X86-BMI2-NEXT:    shrxl %ecx, %edx, %edx
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB30_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %edx, %eax
+; X86-BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI2-NEXT:  .LBB30_2:
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-BMI2-NEXT:    movl $-1, %esi
+; X86-BMI2-NEXT:    shlxl %ebx, %esi, %edi
+; X86-BMI2-NEXT:    testb $32, %bl
+; X86-BMI2-NEXT:    je .LBB30_4
+; X86-BMI2-NEXT:  # %bb.3:
+; X86-BMI2-NEXT:    movl %edi, %esi
+; X86-BMI2-NEXT:    xorl %edi, %edi
+; X86-BMI2-NEXT:  .LBB30_4:
+; X86-BMI2-NEXT:    andnl %edx, %esi, %esi
+; X86-BMI2-NEXT:    andnl %eax, %edi, %edi
+; X86-BMI2-NEXT:    subl $8, %esp
+; X86-BMI2-NEXT:    pushl %ebp
+; X86-BMI2-NEXT:    pushl %ecx
+; X86-BMI2-NEXT:    calll use64 at PLT
+; X86-BMI2-NEXT:    addl $16, %esp
+; X86-BMI2-NEXT:    movl %edi, %eax
+; X86-BMI2-NEXT:    movl %esi, %edx
+; X86-BMI2-NEXT:    addl $12, %esp
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %edi
+; X86-BMI2-NEXT:    popl %ebx
+; X86-BMI2-NEXT:    popl %ebp
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_b5_skipextrauses:
 ; X64-NOBMI:       # %bb.0:
@@ -3526,29 +3526,29 @@ define i64 @bextr64_b5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
 ; X64-NOBMI-NEXT:    popq %rbx
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr64_b5_skipextrauses:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    pushq %rbx
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrq %rax, %rdi, %rbx
-; X64-BMI1NOTBM-NEXT:    movq %rsi, %rdi
-; X64-BMI1NOTBM-NEXT:    callq use64 at PLT
-; X64-BMI1NOTBM-NEXT:    movq %rbx, %rax
-; X64-BMI1NOTBM-NEXT:    popq %rbx
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr64_b5_skipextrauses:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    pushq %rbx
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %rax
-; X64-BMI1BMI2-NEXT:    bzhiq %rdx, %rax, %rbx
-; X64-BMI1BMI2-NEXT:    movq %rsi, %rdi
-; X64-BMI1BMI2-NEXT:    callq use64 at PLT
-; X64-BMI1BMI2-NEXT:    movq %rbx, %rax
-; X64-BMI1BMI2-NEXT:    popq %rbx
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr64_b5_skipextrauses:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    pushq %rbx
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrq %rax, %rdi, %rbx
+; X64-BMI1-NEXT:    movq %rsi, %rdi
+; X64-BMI1-NEXT:    callq use64 at PLT
+; X64-BMI1-NEXT:    movq %rbx, %rax
+; X64-BMI1-NEXT:    popq %rbx
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr64_b5_skipextrauses:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    pushq %rbx
+; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %rax
+; X64-BMI2-NEXT:    bzhiq %rdx, %rax, %rbx
+; X64-BMI2-NEXT:    movq %rsi, %rdi
+; X64-BMI2-NEXT:    callq use64 at PLT
+; X64-BMI2-NEXT:    movq %rbx, %rax
+; X64-BMI2-NEXT:    popq %rbx
+; X64-BMI2-NEXT:    retq
   %shifted = lshr i64 %val, %numskipbits
   %notmask = shl i64 -1, %numlowbits
   %mask = xor i64 %notmask, -1
@@ -3592,59 +3592,59 @@ define i32 @bextr64_32_b0(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr64_32_b0:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-BMI1NOTBM-NEXT:    movl %edi, %edx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %esi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB31_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %esi, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB31_2:
-; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
-; X86-BMI1NOTBM-NEXT:    movl %eax, %ecx
-; X86-BMI1NOTBM-NEXT:    shll %cl, %esi
-; X86-BMI1NOTBM-NEXT:    xorl %ecx, %ecx
-; X86-BMI1NOTBM-NEXT:    testb $32, %al
-; X86-BMI1NOTBM-NEXT:    jne .LBB31_4
-; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl %esi, %ecx
-; X86-BMI1NOTBM-NEXT:  .LBB31_4:
-; X86-BMI1NOTBM-NEXT:    andnl %edx, %ecx, %eax
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr64_32_b0:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edx
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB31_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %edx
-; X86-BMI1BMI2-NEXT:  .LBB31_2:
-; X86-BMI1BMI2-NEXT:    xorl %ecx, %ecx
-; X86-BMI1BMI2-NEXT:    testb $32, %al
-; X86-BMI1BMI2-NEXT:    jne .LBB31_4
-; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
-; X86-BMI1BMI2-NEXT:    shlxl %eax, %ecx, %ecx
-; X86-BMI1BMI2-NEXT:  .LBB31_4:
-; X86-BMI1BMI2-NEXT:    andnl %edx, %ecx, %eax
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr64_32_b0:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI1-NEXT:    movl %edi, %edx
+; X86-BMI1-NEXT:    shrl %cl, %edx
+; X86-BMI1-NEXT:    shrdl %cl, %edi, %esi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    jne .LBB31_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %esi, %edx
+; X86-BMI1-NEXT:  .LBB31_2:
+; X86-BMI1-NEXT:    movl $-1, %esi
+; X86-BMI1-NEXT:    movl %eax, %ecx
+; X86-BMI1-NEXT:    shll %cl, %esi
+; X86-BMI1-NEXT:    xorl %ecx, %ecx
+; X86-BMI1-NEXT:    testb $32, %al
+; X86-BMI1-NEXT:    jne .LBB31_4
+; X86-BMI1-NEXT:  # %bb.3:
+; X86-BMI1-NEXT:    movl %esi, %ecx
+; X86-BMI1-NEXT:  .LBB31_4:
+; X86-BMI1-NEXT:    andnl %edx, %ecx, %eax
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr64_32_b0:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    shrdl %cl, %esi, %edx
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB31_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    shrxl %ecx, %esi, %edx
+; X86-BMI2-NEXT:  .LBB31_2:
+; X86-BMI2-NEXT:    xorl %ecx, %ecx
+; X86-BMI2-NEXT:    testb $32, %al
+; X86-BMI2-NEXT:    jne .LBB31_4
+; X86-BMI2-NEXT:  # %bb.3:
+; X86-BMI2-NEXT:    movl $-1, %ecx
+; X86-BMI2-NEXT:    shlxl %eax, %ecx, %ecx
+; X86-BMI2-NEXT:  .LBB31_4:
+; X86-BMI2-NEXT:    andnl %edx, %ecx, %eax
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_32_b0:
 ; X64-NOBMI:       # %bb.0:
@@ -3659,20 +3659,20 @@ define i32 @bextr64_32_b0(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr64_32_b0:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrq %rax, %rdi, %rax
-; X64-BMI1NOTBM-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr64_32_b0:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %rax
-; X64-BMI1BMI2-NEXT:    bzhil %edx, %eax, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr64_32_b0:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrq %rax, %rdi, %rax
+; X64-BMI1-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr64_32_b0:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %rax
+; X64-BMI2-NEXT:    bzhil %edx, %eax, %eax
+; X64-BMI2-NEXT:    retq
   %shiftedval = lshr i64 %val, %numskipbits
   %widenumlowbits = zext i8 %numlowbits to i64
   %notmask = shl nsw i64 -1, %widenumlowbits
@@ -3709,44 +3709,44 @@ define i32 @bextr64_32_b1(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr64_32_b1:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-BMI1NOTBM-NEXT:    movl %edi, %edx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %esi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB32_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %esi, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB32_2:
-; X86-BMI1NOTBM-NEXT:    shll $8, %eax
-; X86-BMI1NOTBM-NEXT:    bextrl %eax, %edx, %eax
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr64_32_b1:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edx
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB32_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %edx
-; X86-BMI1BMI2-NEXT:  .LBB32_2:
-; X86-BMI1BMI2-NEXT:    bzhil %eax, %edx, %eax
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr64_32_b1:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI1-NEXT:    movl %edi, %edx
+; X86-BMI1-NEXT:    shrl %cl, %edx
+; X86-BMI1-NEXT:    shrdl %cl, %edi, %esi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    jne .LBB32_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %esi, %edx
+; X86-BMI1-NEXT:  .LBB32_2:
+; X86-BMI1-NEXT:    shll $8, %eax
+; X86-BMI1-NEXT:    bextrl %eax, %edx, %eax
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr64_32_b1:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    shrdl %cl, %esi, %edx
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB32_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    shrxl %ecx, %esi, %edx
+; X86-BMI2-NEXT:  .LBB32_2:
+; X86-BMI2-NEXT:    bzhil %eax, %edx, %eax
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_32_b1:
 ; X64-NOBMI:       # %bb.0:
@@ -3760,20 +3760,20 @@ define i32 @bextr64_32_b1(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    andl %edi, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr64_32_b1:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrq %rax, %rdi, %rax
-; X64-BMI1NOTBM-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr64_32_b1:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %rax
-; X64-BMI1BMI2-NEXT:    bzhil %edx, %eax, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr64_32_b1:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrq %rax, %rdi, %rax
+; X64-BMI1-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr64_32_b1:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %rax
+; X64-BMI2-NEXT:    bzhil %edx, %eax, %eax
+; X64-BMI2-NEXT:    retq
   %shiftedval = lshr i64 %val, %numskipbits
   %truncshiftedval = trunc i64 %shiftedval to i32
   %widenumlowbits = zext i8 %numlowbits to i32
@@ -3811,44 +3811,44 @@ define i32 @bextr64_32_b2(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr64_32_b2:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-BMI1NOTBM-NEXT:    movl %edi, %edx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %esi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB33_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %esi, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB33_2:
-; X86-BMI1NOTBM-NEXT:    shll $8, %eax
-; X86-BMI1NOTBM-NEXT:    bextrl %eax, %edx, %eax
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr64_32_b2:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edx
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB33_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %edx
-; X86-BMI1BMI2-NEXT:  .LBB33_2:
-; X86-BMI1BMI2-NEXT:    bzhil %eax, %edx, %eax
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr64_32_b2:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI1-NEXT:    movl %edi, %edx
+; X86-BMI1-NEXT:    shrl %cl, %edx
+; X86-BMI1-NEXT:    shrdl %cl, %edi, %esi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    jne .LBB33_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %esi, %edx
+; X86-BMI1-NEXT:  .LBB33_2:
+; X86-BMI1-NEXT:    shll $8, %eax
+; X86-BMI1-NEXT:    bextrl %eax, %edx, %eax
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr64_32_b2:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    shrdl %cl, %esi, %edx
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB33_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    shrxl %ecx, %esi, %edx
+; X86-BMI2-NEXT:  .LBB33_2:
+; X86-BMI2-NEXT:    bzhil %eax, %edx, %eax
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_32_b2:
 ; X64-NOBMI:       # %bb.0:
@@ -3862,20 +3862,20 @@ define i32 @bextr64_32_b2(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    andl %edi, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr64_32_b2:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrq %rax, %rdi, %rax
-; X64-BMI1NOTBM-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr64_32_b2:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %rax
-; X64-BMI1BMI2-NEXT:    bzhil %edx, %eax, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr64_32_b2:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrq %rax, %rdi, %rax
+; X64-BMI1-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr64_32_b2:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %rax
+; X64-BMI2-NEXT:    bzhil %edx, %eax, %eax
+; X64-BMI2-NEXT:    retq
   %shiftedval = lshr i64 %val, %numskipbits
   %widenumlowbits = zext i8 %numlowbits to i32
   %notmask = shl nsw i32 -1, %widenumlowbits
@@ -3920,59 +3920,59 @@ define i32 @bextr64_32_b3(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr64_32_b3:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-BMI1NOTBM-NEXT:    movl %edi, %edx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %esi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB34_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %esi, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB34_2:
-; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
-; X86-BMI1NOTBM-NEXT:    movl %eax, %ecx
-; X86-BMI1NOTBM-NEXT:    shll %cl, %esi
-; X86-BMI1NOTBM-NEXT:    xorl %ecx, %ecx
-; X86-BMI1NOTBM-NEXT:    testb $32, %al
-; X86-BMI1NOTBM-NEXT:    jne .LBB34_4
-; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl %esi, %ecx
-; X86-BMI1NOTBM-NEXT:  .LBB34_4:
-; X86-BMI1NOTBM-NEXT:    andnl %edx, %ecx, %eax
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr64_32_b3:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edx
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB34_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %edx
-; X86-BMI1BMI2-NEXT:  .LBB34_2:
-; X86-BMI1BMI2-NEXT:    xorl %ecx, %ecx
-; X86-BMI1BMI2-NEXT:    testb $32, %al
-; X86-BMI1BMI2-NEXT:    jne .LBB34_4
-; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
-; X86-BMI1BMI2-NEXT:    shlxl %eax, %ecx, %ecx
-; X86-BMI1BMI2-NEXT:  .LBB34_4:
-; X86-BMI1BMI2-NEXT:    andnl %edx, %ecx, %eax
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr64_32_b3:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI1-NEXT:    movl %edi, %edx
+; X86-BMI1-NEXT:    shrl %cl, %edx
+; X86-BMI1-NEXT:    shrdl %cl, %edi, %esi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    jne .LBB34_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %esi, %edx
+; X86-BMI1-NEXT:  .LBB34_2:
+; X86-BMI1-NEXT:    movl $-1, %esi
+; X86-BMI1-NEXT:    movl %eax, %ecx
+; X86-BMI1-NEXT:    shll %cl, %esi
+; X86-BMI1-NEXT:    xorl %ecx, %ecx
+; X86-BMI1-NEXT:    testb $32, %al
+; X86-BMI1-NEXT:    jne .LBB34_4
+; X86-BMI1-NEXT:  # %bb.3:
+; X86-BMI1-NEXT:    movl %esi, %ecx
+; X86-BMI1-NEXT:  .LBB34_4:
+; X86-BMI1-NEXT:    andnl %edx, %ecx, %eax
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr64_32_b3:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    shrdl %cl, %esi, %edx
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB34_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    shrxl %ecx, %esi, %edx
+; X86-BMI2-NEXT:  .LBB34_2:
+; X86-BMI2-NEXT:    xorl %ecx, %ecx
+; X86-BMI2-NEXT:    testb $32, %al
+; X86-BMI2-NEXT:    jne .LBB34_4
+; X86-BMI2-NEXT:  # %bb.3:
+; X86-BMI2-NEXT:    movl $-1, %ecx
+; X86-BMI2-NEXT:    shlxl %eax, %ecx, %ecx
+; X86-BMI2-NEXT:  .LBB34_4:
+; X86-BMI2-NEXT:    andnl %edx, %ecx, %eax
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_32_b3:
 ; X64-NOBMI:       # %bb.0:
@@ -3988,20 +3988,20 @@ define i32 @bextr64_32_b3(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr64_32_b3:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrq %rax, %rdi, %rax
-; X64-BMI1NOTBM-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr64_32_b3:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %rax
-; X64-BMI1BMI2-NEXT:    bzhil %edx, %eax, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr64_32_b3:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrq %rax, %rdi, %rax
+; X64-BMI1-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr64_32_b3:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %rax
+; X64-BMI2-NEXT:    bzhil %edx, %eax, %eax
+; X64-BMI2-NEXT:    retq
   %shiftedval = lshr i64 %val, %numskipbits
   %widenumlowbits = zext i8 %numlowbits to i64
   %notmask = shl nsw i64 4294967295, %widenumlowbits
@@ -4038,47 +4038,47 @@ define i32 @bextr32_c0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr32_c0:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    pushl %eax
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
-; X86-BMI1NOTBM-NEXT:    xorl %ecx, %ecx
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
-; X86-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %esi
-; X86-BMI1NOTBM-NEXT:    movl %esi, (%esp)
-; X86-BMI1NOTBM-NEXT:    calll use32 at PLT
-; X86-BMI1NOTBM-NEXT:    andl %edi, %esi
-; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:    addl $4, %esp
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr32_c0:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    pushl %eax
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %esi
-; X86-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X86-BMI1BMI2-NEXT:    negb %al
-; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
-; X86-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %eax
-; X86-BMI1BMI2-NEXT:    movl %eax, (%esp)
-; X86-BMI1BMI2-NEXT:    calll use32 at PLT
-; X86-BMI1BMI2-NEXT:    bzhil %ebx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    addl $4, %esp
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %ebx
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr32_c0:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    pushl %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI1-NEXT:    shrl %cl, %edi
+; X86-BMI1-NEXT:    xorl %ecx, %ecx
+; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $-1, %esi
+; X86-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI1-NEXT:    shrl %cl, %esi
+; X86-BMI1-NEXT:    movl %esi, (%esp)
+; X86-BMI1-NEXT:    calll use32 at PLT
+; X86-BMI1-NEXT:    andl %edi, %esi
+; X86-BMI1-NEXT:    movl %esi, %eax
+; X86-BMI1-NEXT:    addl $4, %esp
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr32_c0:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    pushl %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    movl %ebx, %eax
+; X86-BMI2-NEXT:    negb %al
+; X86-BMI2-NEXT:    movl $-1, %ecx
+; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
+; X86-BMI2-NEXT:    movl %eax, (%esp)
+; X86-BMI2-NEXT:    calll use32 at PLT
+; X86-BMI2-NEXT:    bzhil %ebx, %esi, %eax
+; X86-BMI2-NEXT:    addl $4, %esp
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %ebx
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr32_c0:
 ; X64-NOBMI:       # %bb.0:
@@ -4102,45 +4102,45 @@ define i32 @bextr32_c0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    popq %rbp
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr32_c0:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    pushq %rbp
-; X64-BMI1NOTBM-NEXT:    pushq %rbx
-; X64-BMI1NOTBM-NEXT:    pushq %rax
-; X64-BMI1NOTBM-NEXT:    movl %esi, %ecx
-; X64-BMI1NOTBM-NEXT:    movl %edi, %ebx
-; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-BMI1NOTBM-NEXT:    shrl %cl, %ebx
-; X64-BMI1NOTBM-NEXT:    negb %dl
-; X64-BMI1NOTBM-NEXT:    movl $-1, %ebp
-; X64-BMI1NOTBM-NEXT:    movl %edx, %ecx
-; X64-BMI1NOTBM-NEXT:    shrl %cl, %ebp
-; X64-BMI1NOTBM-NEXT:    movl %ebp, %edi
-; X64-BMI1NOTBM-NEXT:    callq use32 at PLT
-; X64-BMI1NOTBM-NEXT:    andl %ebx, %ebp
-; X64-BMI1NOTBM-NEXT:    movl %ebp, %eax
-; X64-BMI1NOTBM-NEXT:    addq $8, %rsp
-; X64-BMI1NOTBM-NEXT:    popq %rbx
-; X64-BMI1NOTBM-NEXT:    popq %rbp
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr32_c0:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    pushq %rbp
-; X64-BMI1BMI2-NEXT:    pushq %rbx
-; X64-BMI1BMI2-NEXT:    pushq %rax
-; X64-BMI1BMI2-NEXT:    movl %edx, %ebx
-; X64-BMI1BMI2-NEXT:    shrxl %esi, %edi, %ebp
-; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI1BMI2-NEXT:    negb %al
-; X64-BMI1BMI2-NEXT:    movl $-1, %ecx
-; X64-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %edi
-; X64-BMI1BMI2-NEXT:    callq use32 at PLT
-; X64-BMI1BMI2-NEXT:    bzhil %ebx, %ebp, %eax
-; X64-BMI1BMI2-NEXT:    addq $8, %rsp
-; X64-BMI1BMI2-NEXT:    popq %rbx
-; X64-BMI1BMI2-NEXT:    popq %rbp
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr32_c0:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    pushq %rbp
+; X64-BMI1-NEXT:    pushq %rbx
+; X64-BMI1-NEXT:    pushq %rax
+; X64-BMI1-NEXT:    movl %esi, %ecx
+; X64-BMI1-NEXT:    movl %edi, %ebx
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI1-NEXT:    shrl %cl, %ebx
+; X64-BMI1-NEXT:    negb %dl
+; X64-BMI1-NEXT:    movl $-1, %ebp
+; X64-BMI1-NEXT:    movl %edx, %ecx
+; X64-BMI1-NEXT:    shrl %cl, %ebp
+; X64-BMI1-NEXT:    movl %ebp, %edi
+; X64-BMI1-NEXT:    callq use32 at PLT
+; X64-BMI1-NEXT:    andl %ebx, %ebp
+; X64-BMI1-NEXT:    movl %ebp, %eax
+; X64-BMI1-NEXT:    addq $8, %rsp
+; X64-BMI1-NEXT:    popq %rbx
+; X64-BMI1-NEXT:    popq %rbp
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr32_c0:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    pushq %rbp
+; X64-BMI2-NEXT:    pushq %rbx
+; X64-BMI2-NEXT:    pushq %rax
+; X64-BMI2-NEXT:    movl %edx, %ebx
+; X64-BMI2-NEXT:    shrxl %esi, %edi, %ebp
+; X64-BMI2-NEXT:    movl %ebx, %eax
+; X64-BMI2-NEXT:    negb %al
+; X64-BMI2-NEXT:    movl $-1, %ecx
+; X64-BMI2-NEXT:    shrxl %eax, %ecx, %edi
+; X64-BMI2-NEXT:    callq use32 at PLT
+; X64-BMI2-NEXT:    bzhil %ebx, %ebp, %eax
+; X64-BMI2-NEXT:    addq $8, %rsp
+; X64-BMI2-NEXT:    popq %rbx
+; X64-BMI2-NEXT:    popq %rbp
+; X64-BMI2-NEXT:    retq
   %shifted = lshr i32 %val, %numskipbits
   %numhighbits = sub i32 32, %numlowbits
   %mask = lshr i32 -1, %numhighbits
@@ -4172,47 +4172,47 @@ define i32 @bextr32_c1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr32_c1_indexzext:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    pushl %eax
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
-; X86-BMI1NOTBM-NEXT:    xorl %ecx, %ecx
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
-; X86-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %esi
-; X86-BMI1NOTBM-NEXT:    movl %esi, (%esp)
-; X86-BMI1NOTBM-NEXT:    calll use32 at PLT
-; X86-BMI1NOTBM-NEXT:    andl %edi, %esi
-; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:    addl $4, %esp
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr32_c1_indexzext:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    pushl %eax
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %esi
-; X86-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X86-BMI1BMI2-NEXT:    negb %al
-; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
-; X86-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %eax
-; X86-BMI1BMI2-NEXT:    movl %eax, (%esp)
-; X86-BMI1BMI2-NEXT:    calll use32 at PLT
-; X86-BMI1BMI2-NEXT:    bzhil %ebx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    addl $4, %esp
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %ebx
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr32_c1_indexzext:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    pushl %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI1-NEXT:    shrl %cl, %edi
+; X86-BMI1-NEXT:    xorl %ecx, %ecx
+; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $-1, %esi
+; X86-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI1-NEXT:    shrl %cl, %esi
+; X86-BMI1-NEXT:    movl %esi, (%esp)
+; X86-BMI1-NEXT:    calll use32 at PLT
+; X86-BMI1-NEXT:    andl %edi, %esi
+; X86-BMI1-NEXT:    movl %esi, %eax
+; X86-BMI1-NEXT:    addl $4, %esp
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr32_c1_indexzext:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    pushl %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    movl %ebx, %eax
+; X86-BMI2-NEXT:    negb %al
+; X86-BMI2-NEXT:    movl $-1, %ecx
+; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
+; X86-BMI2-NEXT:    movl %eax, (%esp)
+; X86-BMI2-NEXT:    calll use32 at PLT
+; X86-BMI2-NEXT:    bzhil %ebx, %esi, %eax
+; X86-BMI2-NEXT:    addl $4, %esp
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %ebx
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr32_c1_indexzext:
 ; X64-NOBMI:       # %bb.0:
@@ -4236,45 +4236,45 @@ define i32 @bextr32_c1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; X64-NOBMI-NEXT:    popq %rbp
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr32_c1_indexzext:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    pushq %rbp
-; X64-BMI1NOTBM-NEXT:    pushq %rbx
-; X64-BMI1NOTBM-NEXT:    pushq %rax
-; X64-BMI1NOTBM-NEXT:    movl %esi, %ecx
-; X64-BMI1NOTBM-NEXT:    movl %edi, %ebx
-; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-BMI1NOTBM-NEXT:    shrl %cl, %ebx
-; X64-BMI1NOTBM-NEXT:    negb %dl
-; X64-BMI1NOTBM-NEXT:    movl $-1, %ebp
-; X64-BMI1NOTBM-NEXT:    movl %edx, %ecx
-; X64-BMI1NOTBM-NEXT:    shrl %cl, %ebp
-; X64-BMI1NOTBM-NEXT:    movl %ebp, %edi
-; X64-BMI1NOTBM-NEXT:    callq use32 at PLT
-; X64-BMI1NOTBM-NEXT:    andl %ebx, %ebp
-; X64-BMI1NOTBM-NEXT:    movl %ebp, %eax
-; X64-BMI1NOTBM-NEXT:    addq $8, %rsp
-; X64-BMI1NOTBM-NEXT:    popq %rbx
-; X64-BMI1NOTBM-NEXT:    popq %rbp
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr32_c1_indexzext:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    pushq %rbp
-; X64-BMI1BMI2-NEXT:    pushq %rbx
-; X64-BMI1BMI2-NEXT:    pushq %rax
-; X64-BMI1BMI2-NEXT:    movl %edx, %ebx
-; X64-BMI1BMI2-NEXT:    shrxl %esi, %edi, %ebp
-; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI1BMI2-NEXT:    negb %al
-; X64-BMI1BMI2-NEXT:    movl $-1, %ecx
-; X64-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %edi
-; X64-BMI1BMI2-NEXT:    callq use32 at PLT
-; X64-BMI1BMI2-NEXT:    bzhil %ebx, %ebp, %eax
-; X64-BMI1BMI2-NEXT:    addq $8, %rsp
-; X64-BMI1BMI2-NEXT:    popq %rbx
-; X64-BMI1BMI2-NEXT:    popq %rbp
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr32_c1_indexzext:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    pushq %rbp
+; X64-BMI1-NEXT:    pushq %rbx
+; X64-BMI1-NEXT:    pushq %rax
+; X64-BMI1-NEXT:    movl %esi, %ecx
+; X64-BMI1-NEXT:    movl %edi, %ebx
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI1-NEXT:    shrl %cl, %ebx
+; X64-BMI1-NEXT:    negb %dl
+; X64-BMI1-NEXT:    movl $-1, %ebp
+; X64-BMI1-NEXT:    movl %edx, %ecx
+; X64-BMI1-NEXT:    shrl %cl, %ebp
+; X64-BMI1-NEXT:    movl %ebp, %edi
+; X64-BMI1-NEXT:    callq use32 at PLT
+; X64-BMI1-NEXT:    andl %ebx, %ebp
+; X64-BMI1-NEXT:    movl %ebp, %eax
+; X64-BMI1-NEXT:    addq $8, %rsp
+; X64-BMI1-NEXT:    popq %rbx
+; X64-BMI1-NEXT:    popq %rbp
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr32_c1_indexzext:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    pushq %rbp
+; X64-BMI2-NEXT:    pushq %rbx
+; X64-BMI2-NEXT:    pushq %rax
+; X64-BMI2-NEXT:    movl %edx, %ebx
+; X64-BMI2-NEXT:    shrxl %esi, %edi, %ebp
+; X64-BMI2-NEXT:    movl %ebx, %eax
+; X64-BMI2-NEXT:    negb %al
+; X64-BMI2-NEXT:    movl $-1, %ecx
+; X64-BMI2-NEXT:    shrxl %eax, %ecx, %edi
+; X64-BMI2-NEXT:    callq use32 at PLT
+; X64-BMI2-NEXT:    bzhil %ebx, %ebp, %eax
+; X64-BMI2-NEXT:    addq $8, %rsp
+; X64-BMI2-NEXT:    popq %rbx
+; X64-BMI2-NEXT:    popq %rbp
+; X64-BMI2-NEXT:    retq
   %skip = zext i8 %numskipbits to i32
   %shifted = lshr i32 %val, %skip
   %numhighbits = sub i8 32, %numlowbits
@@ -4309,49 +4309,49 @@ define i32 @bextr32_c2_load(i32* %w, i32 %numskipbits, i32 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr32_c2_load:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    pushl %eax
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movl (%eax), %edi
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
-; X86-BMI1NOTBM-NEXT:    xorl %ecx, %ecx
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
-; X86-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %esi
-; X86-BMI1NOTBM-NEXT:    movl %esi, (%esp)
-; X86-BMI1NOTBM-NEXT:    calll use32 at PLT
-; X86-BMI1NOTBM-NEXT:    andl %edi, %esi
-; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:    addl $4, %esp
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr32_c2_load:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    pushl %eax
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, (%eax), %esi
-; X86-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X86-BMI1BMI2-NEXT:    negb %al
-; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
-; X86-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %eax
-; X86-BMI1BMI2-NEXT:    movl %eax, (%esp)
-; X86-BMI1BMI2-NEXT:    calll use32 at PLT
-; X86-BMI1BMI2-NEXT:    bzhil %ebx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    addl $4, %esp
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %ebx
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr32_c2_load:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    pushl %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movl (%eax), %edi
+; X86-BMI1-NEXT:    shrl %cl, %edi
+; X86-BMI1-NEXT:    xorl %ecx, %ecx
+; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $-1, %esi
+; X86-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI1-NEXT:    shrl %cl, %esi
+; X86-BMI1-NEXT:    movl %esi, (%esp)
+; X86-BMI1-NEXT:    calll use32 at PLT
+; X86-BMI1-NEXT:    andl %edi, %esi
+; X86-BMI1-NEXT:    movl %esi, %eax
+; X86-BMI1-NEXT:    addl $4, %esp
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr32_c2_load:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    pushl %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    shrxl %ecx, (%eax), %esi
+; X86-BMI2-NEXT:    movl %ebx, %eax
+; X86-BMI2-NEXT:    negb %al
+; X86-BMI2-NEXT:    movl $-1, %ecx
+; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
+; X86-BMI2-NEXT:    movl %eax, (%esp)
+; X86-BMI2-NEXT:    calll use32 at PLT
+; X86-BMI2-NEXT:    bzhil %ebx, %esi, %eax
+; X86-BMI2-NEXT:    addl $4, %esp
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %ebx
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr32_c2_load:
 ; X64-NOBMI:       # %bb.0:
@@ -4375,45 +4375,45 @@ define i32 @bextr32_c2_load(i32* %w, i32 %numskipbits, i32 %numlowbits) nounwind
 ; X64-NOBMI-NEXT:    popq %rbp
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr32_c2_load:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    pushq %rbp
-; X64-BMI1NOTBM-NEXT:    pushq %rbx
-; X64-BMI1NOTBM-NEXT:    pushq %rax
-; X64-BMI1NOTBM-NEXT:    movl %esi, %ecx
-; X64-BMI1NOTBM-NEXT:    movl (%rdi), %ebp
-; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-BMI1NOTBM-NEXT:    shrl %cl, %ebp
-; X64-BMI1NOTBM-NEXT:    negb %dl
-; X64-BMI1NOTBM-NEXT:    movl $-1, %ebx
-; X64-BMI1NOTBM-NEXT:    movl %edx, %ecx
-; X64-BMI1NOTBM-NEXT:    shrl %cl, %ebx
-; X64-BMI1NOTBM-NEXT:    movl %ebx, %edi
-; X64-BMI1NOTBM-NEXT:    callq use32 at PLT
-; X64-BMI1NOTBM-NEXT:    andl %ebp, %ebx
-; X64-BMI1NOTBM-NEXT:    movl %ebx, %eax
-; X64-BMI1NOTBM-NEXT:    addq $8, %rsp
-; X64-BMI1NOTBM-NEXT:    popq %rbx
-; X64-BMI1NOTBM-NEXT:    popq %rbp
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr32_c2_load:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    pushq %rbp
-; X64-BMI1BMI2-NEXT:    pushq %rbx
-; X64-BMI1BMI2-NEXT:    pushq %rax
-; X64-BMI1BMI2-NEXT:    movl %edx, %ebx
-; X64-BMI1BMI2-NEXT:    shrxl %esi, (%rdi), %ebp
-; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI1BMI2-NEXT:    negb %al
-; X64-BMI1BMI2-NEXT:    movl $-1, %ecx
-; X64-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %edi
-; X64-BMI1BMI2-NEXT:    callq use32 at PLT
-; X64-BMI1BMI2-NEXT:    bzhil %ebx, %ebp, %eax
-; X64-BMI1BMI2-NEXT:    addq $8, %rsp
-; X64-BMI1BMI2-NEXT:    popq %rbx
-; X64-BMI1BMI2-NEXT:    popq %rbp
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr32_c2_load:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    pushq %rbp
+; X64-BMI1-NEXT:    pushq %rbx
+; X64-BMI1-NEXT:    pushq %rax
+; X64-BMI1-NEXT:    movl %esi, %ecx
+; X64-BMI1-NEXT:    movl (%rdi), %ebp
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI1-NEXT:    shrl %cl, %ebp
+; X64-BMI1-NEXT:    negb %dl
+; X64-BMI1-NEXT:    movl $-1, %ebx
+; X64-BMI1-NEXT:    movl %edx, %ecx
+; X64-BMI1-NEXT:    shrl %cl, %ebx
+; X64-BMI1-NEXT:    movl %ebx, %edi
+; X64-BMI1-NEXT:    callq use32 at PLT
+; X64-BMI1-NEXT:    andl %ebp, %ebx
+; X64-BMI1-NEXT:    movl %ebx, %eax
+; X64-BMI1-NEXT:    addq $8, %rsp
+; X64-BMI1-NEXT:    popq %rbx
+; X64-BMI1-NEXT:    popq %rbp
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr32_c2_load:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    pushq %rbp
+; X64-BMI2-NEXT:    pushq %rbx
+; X64-BMI2-NEXT:    pushq %rax
+; X64-BMI2-NEXT:    movl %edx, %ebx
+; X64-BMI2-NEXT:    shrxl %esi, (%rdi), %ebp
+; X64-BMI2-NEXT:    movl %ebx, %eax
+; X64-BMI2-NEXT:    negb %al
+; X64-BMI2-NEXT:    movl $-1, %ecx
+; X64-BMI2-NEXT:    shrxl %eax, %ecx, %edi
+; X64-BMI2-NEXT:    callq use32 at PLT
+; X64-BMI2-NEXT:    bzhil %ebx, %ebp, %eax
+; X64-BMI2-NEXT:    addq $8, %rsp
+; X64-BMI2-NEXT:    popq %rbx
+; X64-BMI2-NEXT:    popq %rbp
+; X64-BMI2-NEXT:    retq
   %val = load i32, i32* %w
   %shifted = lshr i32 %val, %numskipbits
   %numhighbits = sub i32 32, %numlowbits
@@ -4447,49 +4447,49 @@ define i32 @bextr32_c3_load_indexzext(i32* %w, i8 %numskipbits, i8 %numlowbits)
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr32_c3_load_indexzext:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    pushl %eax
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movl (%eax), %edi
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
-; X86-BMI1NOTBM-NEXT:    xorl %ecx, %ecx
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
-; X86-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %esi
-; X86-BMI1NOTBM-NEXT:    movl %esi, (%esp)
-; X86-BMI1NOTBM-NEXT:    calll use32 at PLT
-; X86-BMI1NOTBM-NEXT:    andl %edi, %esi
-; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:    addl $4, %esp
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr32_c3_load_indexzext:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    pushl %eax
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, (%eax), %esi
-; X86-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X86-BMI1BMI2-NEXT:    negb %al
-; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
-; X86-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %eax
-; X86-BMI1BMI2-NEXT:    movl %eax, (%esp)
-; X86-BMI1BMI2-NEXT:    calll use32 at PLT
-; X86-BMI1BMI2-NEXT:    bzhil %ebx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    addl $4, %esp
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %ebx
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr32_c3_load_indexzext:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    pushl %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movl (%eax), %edi
+; X86-BMI1-NEXT:    shrl %cl, %edi
+; X86-BMI1-NEXT:    xorl %ecx, %ecx
+; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $-1, %esi
+; X86-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI1-NEXT:    shrl %cl, %esi
+; X86-BMI1-NEXT:    movl %esi, (%esp)
+; X86-BMI1-NEXT:    calll use32 at PLT
+; X86-BMI1-NEXT:    andl %edi, %esi
+; X86-BMI1-NEXT:    movl %esi, %eax
+; X86-BMI1-NEXT:    addl $4, %esp
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr32_c3_load_indexzext:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    pushl %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    shrxl %ecx, (%eax), %esi
+; X86-BMI2-NEXT:    movl %ebx, %eax
+; X86-BMI2-NEXT:    negb %al
+; X86-BMI2-NEXT:    movl $-1, %ecx
+; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
+; X86-BMI2-NEXT:    movl %eax, (%esp)
+; X86-BMI2-NEXT:    calll use32 at PLT
+; X86-BMI2-NEXT:    bzhil %ebx, %esi, %eax
+; X86-BMI2-NEXT:    addl $4, %esp
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %ebx
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr32_c3_load_indexzext:
 ; X64-NOBMI:       # %bb.0:
@@ -4513,45 +4513,45 @@ define i32 @bextr32_c3_load_indexzext(i32* %w, i8 %numskipbits, i8 %numlowbits)
 ; X64-NOBMI-NEXT:    popq %rbp
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr32_c3_load_indexzext:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    pushq %rbp
-; X64-BMI1NOTBM-NEXT:    pushq %rbx
-; X64-BMI1NOTBM-NEXT:    pushq %rax
-; X64-BMI1NOTBM-NEXT:    movl %esi, %ecx
-; X64-BMI1NOTBM-NEXT:    movl (%rdi), %ebp
-; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-BMI1NOTBM-NEXT:    shrl %cl, %ebp
-; X64-BMI1NOTBM-NEXT:    negb %dl
-; X64-BMI1NOTBM-NEXT:    movl $-1, %ebx
-; X64-BMI1NOTBM-NEXT:    movl %edx, %ecx
-; X64-BMI1NOTBM-NEXT:    shrl %cl, %ebx
-; X64-BMI1NOTBM-NEXT:    movl %ebx, %edi
-; X64-BMI1NOTBM-NEXT:    callq use32 at PLT
-; X64-BMI1NOTBM-NEXT:    andl %ebp, %ebx
-; X64-BMI1NOTBM-NEXT:    movl %ebx, %eax
-; X64-BMI1NOTBM-NEXT:    addq $8, %rsp
-; X64-BMI1NOTBM-NEXT:    popq %rbx
-; X64-BMI1NOTBM-NEXT:    popq %rbp
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr32_c3_load_indexzext:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    pushq %rbp
-; X64-BMI1BMI2-NEXT:    pushq %rbx
-; X64-BMI1BMI2-NEXT:    pushq %rax
-; X64-BMI1BMI2-NEXT:    movl %edx, %ebx
-; X64-BMI1BMI2-NEXT:    shrxl %esi, (%rdi), %ebp
-; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI1BMI2-NEXT:    negb %al
-; X64-BMI1BMI2-NEXT:    movl $-1, %ecx
-; X64-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %edi
-; X64-BMI1BMI2-NEXT:    callq use32 at PLT
-; X64-BMI1BMI2-NEXT:    bzhil %ebx, %ebp, %eax
-; X64-BMI1BMI2-NEXT:    addq $8, %rsp
-; X64-BMI1BMI2-NEXT:    popq %rbx
-; X64-BMI1BMI2-NEXT:    popq %rbp
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr32_c3_load_indexzext:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    pushq %rbp
+; X64-BMI1-NEXT:    pushq %rbx
+; X64-BMI1-NEXT:    pushq %rax
+; X64-BMI1-NEXT:    movl %esi, %ecx
+; X64-BMI1-NEXT:    movl (%rdi), %ebp
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI1-NEXT:    shrl %cl, %ebp
+; X64-BMI1-NEXT:    negb %dl
+; X64-BMI1-NEXT:    movl $-1, %ebx
+; X64-BMI1-NEXT:    movl %edx, %ecx
+; X64-BMI1-NEXT:    shrl %cl, %ebx
+; X64-BMI1-NEXT:    movl %ebx, %edi
+; X64-BMI1-NEXT:    callq use32 at PLT
+; X64-BMI1-NEXT:    andl %ebp, %ebx
+; X64-BMI1-NEXT:    movl %ebx, %eax
+; X64-BMI1-NEXT:    addq $8, %rsp
+; X64-BMI1-NEXT:    popq %rbx
+; X64-BMI1-NEXT:    popq %rbp
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr32_c3_load_indexzext:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    pushq %rbp
+; X64-BMI2-NEXT:    pushq %rbx
+; X64-BMI2-NEXT:    pushq %rax
+; X64-BMI2-NEXT:    movl %edx, %ebx
+; X64-BMI2-NEXT:    shrxl %esi, (%rdi), %ebp
+; X64-BMI2-NEXT:    movl %ebx, %eax
+; X64-BMI2-NEXT:    negb %al
+; X64-BMI2-NEXT:    movl $-1, %ecx
+; X64-BMI2-NEXT:    shrxl %eax, %ecx, %edi
+; X64-BMI2-NEXT:    callq use32 at PLT
+; X64-BMI2-NEXT:    bzhil %ebx, %ebp, %eax
+; X64-BMI2-NEXT:    addq $8, %rsp
+; X64-BMI2-NEXT:    popq %rbx
+; X64-BMI2-NEXT:    popq %rbp
+; X64-BMI2-NEXT:    retq
   %val = load i32, i32* %w
   %skip = zext i8 %numskipbits to i32
   %shifted = lshr i32 %val, %skip
@@ -4586,47 +4586,47 @@ define i32 @bextr32_c4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits)
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr32_c4_commutative:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    pushl %eax
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
-; X86-BMI1NOTBM-NEXT:    xorl %ecx, %ecx
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
-; X86-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %esi
-; X86-BMI1NOTBM-NEXT:    movl %esi, (%esp)
-; X86-BMI1NOTBM-NEXT:    calll use32 at PLT
-; X86-BMI1NOTBM-NEXT:    andl %edi, %esi
-; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:    addl $4, %esp
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr32_c4_commutative:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    pushl %eax
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %esi
-; X86-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X86-BMI1BMI2-NEXT:    negb %al
-; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
-; X86-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %eax
-; X86-BMI1BMI2-NEXT:    movl %eax, (%esp)
-; X86-BMI1BMI2-NEXT:    calll use32 at PLT
-; X86-BMI1BMI2-NEXT:    bzhil %ebx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    addl $4, %esp
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %ebx
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr32_c4_commutative:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    pushl %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI1-NEXT:    shrl %cl, %edi
+; X86-BMI1-NEXT:    xorl %ecx, %ecx
+; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $-1, %esi
+; X86-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI1-NEXT:    shrl %cl, %esi
+; X86-BMI1-NEXT:    movl %esi, (%esp)
+; X86-BMI1-NEXT:    calll use32 at PLT
+; X86-BMI1-NEXT:    andl %edi, %esi
+; X86-BMI1-NEXT:    movl %esi, %eax
+; X86-BMI1-NEXT:    addl $4, %esp
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr32_c4_commutative:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    pushl %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    movl %ebx, %eax
+; X86-BMI2-NEXT:    negb %al
+; X86-BMI2-NEXT:    movl $-1, %ecx
+; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
+; X86-BMI2-NEXT:    movl %eax, (%esp)
+; X86-BMI2-NEXT:    calll use32 at PLT
+; X86-BMI2-NEXT:    bzhil %ebx, %esi, %eax
+; X86-BMI2-NEXT:    addl $4, %esp
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %ebx
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr32_c4_commutative:
 ; X64-NOBMI:       # %bb.0:
@@ -4650,45 +4650,45 @@ define i32 @bextr32_c4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits)
 ; X64-NOBMI-NEXT:    popq %rbp
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr32_c4_commutative:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    pushq %rbp
-; X64-BMI1NOTBM-NEXT:    pushq %rbx
-; X64-BMI1NOTBM-NEXT:    pushq %rax
-; X64-BMI1NOTBM-NEXT:    movl %esi, %ecx
-; X64-BMI1NOTBM-NEXT:    movl %edi, %ebx
-; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-BMI1NOTBM-NEXT:    shrl %cl, %ebx
-; X64-BMI1NOTBM-NEXT:    negb %dl
-; X64-BMI1NOTBM-NEXT:    movl $-1, %ebp
-; X64-BMI1NOTBM-NEXT:    movl %edx, %ecx
-; X64-BMI1NOTBM-NEXT:    shrl %cl, %ebp
-; X64-BMI1NOTBM-NEXT:    movl %ebp, %edi
-; X64-BMI1NOTBM-NEXT:    callq use32 at PLT
-; X64-BMI1NOTBM-NEXT:    andl %ebx, %ebp
-; X64-BMI1NOTBM-NEXT:    movl %ebp, %eax
-; X64-BMI1NOTBM-NEXT:    addq $8, %rsp
-; X64-BMI1NOTBM-NEXT:    popq %rbx
-; X64-BMI1NOTBM-NEXT:    popq %rbp
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr32_c4_commutative:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    pushq %rbp
-; X64-BMI1BMI2-NEXT:    pushq %rbx
-; X64-BMI1BMI2-NEXT:    pushq %rax
-; X64-BMI1BMI2-NEXT:    movl %edx, %ebx
-; X64-BMI1BMI2-NEXT:    shrxl %esi, %edi, %ebp
-; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI1BMI2-NEXT:    negb %al
-; X64-BMI1BMI2-NEXT:    movl $-1, %ecx
-; X64-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %edi
-; X64-BMI1BMI2-NEXT:    callq use32 at PLT
-; X64-BMI1BMI2-NEXT:    bzhil %ebx, %ebp, %eax
-; X64-BMI1BMI2-NEXT:    addq $8, %rsp
-; X64-BMI1BMI2-NEXT:    popq %rbx
-; X64-BMI1BMI2-NEXT:    popq %rbp
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr32_c4_commutative:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    pushq %rbp
+; X64-BMI1-NEXT:    pushq %rbx
+; X64-BMI1-NEXT:    pushq %rax
+; X64-BMI1-NEXT:    movl %esi, %ecx
+; X64-BMI1-NEXT:    movl %edi, %ebx
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI1-NEXT:    shrl %cl, %ebx
+; X64-BMI1-NEXT:    negb %dl
+; X64-BMI1-NEXT:    movl $-1, %ebp
+; X64-BMI1-NEXT:    movl %edx, %ecx
+; X64-BMI1-NEXT:    shrl %cl, %ebp
+; X64-BMI1-NEXT:    movl %ebp, %edi
+; X64-BMI1-NEXT:    callq use32 at PLT
+; X64-BMI1-NEXT:    andl %ebx, %ebp
+; X64-BMI1-NEXT:    movl %ebp, %eax
+; X64-BMI1-NEXT:    addq $8, %rsp
+; X64-BMI1-NEXT:    popq %rbx
+; X64-BMI1-NEXT:    popq %rbp
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr32_c4_commutative:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    pushq %rbp
+; X64-BMI2-NEXT:    pushq %rbx
+; X64-BMI2-NEXT:    pushq %rax
+; X64-BMI2-NEXT:    movl %edx, %ebx
+; X64-BMI2-NEXT:    shrxl %esi, %edi, %ebp
+; X64-BMI2-NEXT:    movl %ebx, %eax
+; X64-BMI2-NEXT:    negb %al
+; X64-BMI2-NEXT:    movl $-1, %ecx
+; X64-BMI2-NEXT:    shrxl %eax, %ecx, %edi
+; X64-BMI2-NEXT:    callq use32 at PLT
+; X64-BMI2-NEXT:    bzhil %ebx, %ebp, %eax
+; X64-BMI2-NEXT:    addq $8, %rsp
+; X64-BMI2-NEXT:    popq %rbx
+; X64-BMI2-NEXT:    popq %rbp
+; X64-BMI2-NEXT:    retq
   %shifted = lshr i32 %val, %numskipbits
   %numhighbits = sub i32 32, %numlowbits
   %mask = lshr i32 -1, %numhighbits
@@ -4725,57 +4725,57 @@ define i32 @bextr32_c5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits
 ; X86-NOBMI-NEXT:    popl %ebx
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr32_c5_skipextrauses:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %ebx
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    subl $16, %esp
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; X86-BMI1NOTBM-NEXT:    movl %ebx, %ecx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
-; X86-BMI1NOTBM-NEXT:    xorl %ecx, %ecx
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
-; X86-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %esi
-; X86-BMI1NOTBM-NEXT:    movl %esi, (%esp)
-; X86-BMI1NOTBM-NEXT:    calll use32 at PLT
-; X86-BMI1NOTBM-NEXT:    andl %edi, %esi
-; X86-BMI1NOTBM-NEXT:    movl %ebx, (%esp)
-; X86-BMI1NOTBM-NEXT:    calll use32 at PLT
-; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:    addl $16, %esp
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    popl %ebx
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr32_c5_skipextrauses:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    pushl %edi
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    subl $16, %esp
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-BMI1BMI2-NEXT:    shrxl %edi, {{[0-9]+}}(%esp), %esi
-; X86-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X86-BMI1BMI2-NEXT:    negb %al
-; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
-; X86-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %eax
-; X86-BMI1BMI2-NEXT:    movl %eax, (%esp)
-; X86-BMI1BMI2-NEXT:    calll use32 at PLT
-; X86-BMI1BMI2-NEXT:    bzhil %ebx, %esi, %esi
-; X86-BMI1BMI2-NEXT:    movl %edi, (%esp)
-; X86-BMI1BMI2-NEXT:    calll use32 at PLT
-; X86-BMI1BMI2-NEXT:    movl %esi, %eax
-; X86-BMI1BMI2-NEXT:    addl $16, %esp
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %edi
-; X86-BMI1BMI2-NEXT:    popl %ebx
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr32_c5_skipextrauses:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %ebx
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    subl $16, %esp
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-BMI1-NEXT:    movl %ebx, %ecx
+; X86-BMI1-NEXT:    shrl %cl, %edi
+; X86-BMI1-NEXT:    xorl %ecx, %ecx
+; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $-1, %esi
+; X86-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI1-NEXT:    shrl %cl, %esi
+; X86-BMI1-NEXT:    movl %esi, (%esp)
+; X86-BMI1-NEXT:    calll use32 at PLT
+; X86-BMI1-NEXT:    andl %edi, %esi
+; X86-BMI1-NEXT:    movl %ebx, (%esp)
+; X86-BMI1-NEXT:    calll use32 at PLT
+; X86-BMI1-NEXT:    movl %esi, %eax
+; X86-BMI1-NEXT:    addl $16, %esp
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    popl %ebx
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr32_c5_skipextrauses:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    pushl %edi
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    subl $16, %esp
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI2-NEXT:    shrxl %edi, {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    movl %ebx, %eax
+; X86-BMI2-NEXT:    negb %al
+; X86-BMI2-NEXT:    movl $-1, %ecx
+; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
+; X86-BMI2-NEXT:    movl %eax, (%esp)
+; X86-BMI2-NEXT:    calll use32 at PLT
+; X86-BMI2-NEXT:    bzhil %ebx, %esi, %esi
+; X86-BMI2-NEXT:    movl %edi, (%esp)
+; X86-BMI2-NEXT:    calll use32 at PLT
+; X86-BMI2-NEXT:    movl %esi, %eax
+; X86-BMI2-NEXT:    addl $16, %esp
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %edi
+; X86-BMI2-NEXT:    popl %ebx
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr32_c5_skipextrauses:
 ; X64-NOBMI:       # %bb.0:
@@ -4801,51 +4801,51 @@ define i32 @bextr32_c5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits
 ; X64-NOBMI-NEXT:    popq %rbp
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr32_c5_skipextrauses:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    pushq %rbp
-; X64-BMI1NOTBM-NEXT:    pushq %r14
-; X64-BMI1NOTBM-NEXT:    pushq %rbx
-; X64-BMI1NOTBM-NEXT:    movl %esi, %r14d
-; X64-BMI1NOTBM-NEXT:    movl %edi, %ebp
-; X64-BMI1NOTBM-NEXT:    movl %r14d, %ecx
-; X64-BMI1NOTBM-NEXT:    shrl %cl, %ebp
-; X64-BMI1NOTBM-NEXT:    negb %dl
-; X64-BMI1NOTBM-NEXT:    movl $-1, %ebx
-; X64-BMI1NOTBM-NEXT:    movl %edx, %ecx
-; X64-BMI1NOTBM-NEXT:    shrl %cl, %ebx
-; X64-BMI1NOTBM-NEXT:    movl %ebx, %edi
-; X64-BMI1NOTBM-NEXT:    callq use32 at PLT
-; X64-BMI1NOTBM-NEXT:    andl %ebp, %ebx
-; X64-BMI1NOTBM-NEXT:    movl %r14d, %edi
-; X64-BMI1NOTBM-NEXT:    callq use32 at PLT
-; X64-BMI1NOTBM-NEXT:    movl %ebx, %eax
-; X64-BMI1NOTBM-NEXT:    popq %rbx
-; X64-BMI1NOTBM-NEXT:    popq %r14
-; X64-BMI1NOTBM-NEXT:    popq %rbp
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr32_c5_skipextrauses:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    pushq %rbp
-; X64-BMI1BMI2-NEXT:    pushq %r14
-; X64-BMI1BMI2-NEXT:    pushq %rbx
-; X64-BMI1BMI2-NEXT:    movl %edx, %ebx
-; X64-BMI1BMI2-NEXT:    movl %esi, %ebp
-; X64-BMI1BMI2-NEXT:    shrxl %esi, %edi, %r14d
-; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI1BMI2-NEXT:    negb %al
-; X64-BMI1BMI2-NEXT:    movl $-1, %ecx
-; X64-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %edi
-; X64-BMI1BMI2-NEXT:    callq use32 at PLT
-; X64-BMI1BMI2-NEXT:    bzhil %ebx, %r14d, %ebx
-; X64-BMI1BMI2-NEXT:    movl %ebp, %edi
-; X64-BMI1BMI2-NEXT:    callq use32 at PLT
-; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI1BMI2-NEXT:    popq %rbx
-; X64-BMI1BMI2-NEXT:    popq %r14
-; X64-BMI1BMI2-NEXT:    popq %rbp
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr32_c5_skipextrauses:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    pushq %rbp
+; X64-BMI1-NEXT:    pushq %r14
+; X64-BMI1-NEXT:    pushq %rbx
+; X64-BMI1-NEXT:    movl %esi, %r14d
+; X64-BMI1-NEXT:    movl %edi, %ebp
+; X64-BMI1-NEXT:    movl %r14d, %ecx
+; X64-BMI1-NEXT:    shrl %cl, %ebp
+; X64-BMI1-NEXT:    negb %dl
+; X64-BMI1-NEXT:    movl $-1, %ebx
+; X64-BMI1-NEXT:    movl %edx, %ecx
+; X64-BMI1-NEXT:    shrl %cl, %ebx
+; X64-BMI1-NEXT:    movl %ebx, %edi
+; X64-BMI1-NEXT:    callq use32 at PLT
+; X64-BMI1-NEXT:    andl %ebp, %ebx
+; X64-BMI1-NEXT:    movl %r14d, %edi
+; X64-BMI1-NEXT:    callq use32 at PLT
+; X64-BMI1-NEXT:    movl %ebx, %eax
+; X64-BMI1-NEXT:    popq %rbx
+; X64-BMI1-NEXT:    popq %r14
+; X64-BMI1-NEXT:    popq %rbp
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr32_c5_skipextrauses:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    pushq %rbp
+; X64-BMI2-NEXT:    pushq %r14
+; X64-BMI2-NEXT:    pushq %rbx
+; X64-BMI2-NEXT:    movl %edx, %ebx
+; X64-BMI2-NEXT:    movl %esi, %ebp
+; X64-BMI2-NEXT:    shrxl %esi, %edi, %r14d
+; X64-BMI2-NEXT:    movl %ebx, %eax
+; X64-BMI2-NEXT:    negb %al
+; X64-BMI2-NEXT:    movl $-1, %ecx
+; X64-BMI2-NEXT:    shrxl %eax, %ecx, %edi
+; X64-BMI2-NEXT:    callq use32 at PLT
+; X64-BMI2-NEXT:    bzhil %ebx, %r14d, %ebx
+; X64-BMI2-NEXT:    movl %ebp, %edi
+; X64-BMI2-NEXT:    callq use32 at PLT
+; X64-BMI2-NEXT:    movl %ebx, %eax
+; X64-BMI2-NEXT:    popq %rbx
+; X64-BMI2-NEXT:    popq %r14
+; X64-BMI2-NEXT:    popq %rbp
+; X64-BMI2-NEXT:    retq
   %shifted = lshr i32 %val, %numskipbits
   %numhighbits = sub i32 32, %numlowbits
   %mask = lshr i32 -1, %numhighbits
@@ -4904,95 +4904,95 @@ define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    popl %ebp
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr64_c0:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %ebp
-; X86-BMI1NOTBM-NEXT:    pushl %ebx
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    subl $12, %esp
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %esi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB41_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
-; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB41_2:
-; X86-BMI1NOTBM-NEXT:    movb $64, %cl
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $-1, %ebp
-; X86-BMI1NOTBM-NEXT:    movl $-1, %ebx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %ebx
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB41_4
-; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl %ebx, %ebp
-; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB41_4:
-; X86-BMI1NOTBM-NEXT:    subl $8, %esp
-; X86-BMI1NOTBM-NEXT:    pushl %ebx
-; X86-BMI1NOTBM-NEXT:    pushl %ebp
-; X86-BMI1NOTBM-NEXT:    calll use64 at PLT
-; X86-BMI1NOTBM-NEXT:    addl $16, %esp
-; X86-BMI1NOTBM-NEXT:    andl %ebp, %esi
-; X86-BMI1NOTBM-NEXT:    andl %ebx, %edi
-; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:    movl %edi, %edx
-; X86-BMI1NOTBM-NEXT:    addl $12, %esp
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    popl %ebx
-; X86-BMI1NOTBM-NEXT:    popl %ebp
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr64_c0:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebp
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    pushl %edi
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    subl $12, %esp
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %esi
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edi
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB41_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %edi, %esi
-; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
-; X86-BMI1BMI2-NEXT:  .LBB41_2:
-; X86-BMI1BMI2-NEXT:    movb $64, %al
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movl $-1, %ebp
-; X86-BMI1BMI2-NEXT:    shrxl %eax, %ebp, %ebx
-; X86-BMI1BMI2-NEXT:    testb $32, %al
-; X86-BMI1BMI2-NEXT:    je .LBB41_4
-; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    movl %ebx, %ebp
-; X86-BMI1BMI2-NEXT:    xorl %ebx, %ebx
-; X86-BMI1BMI2-NEXT:  .LBB41_4:
-; X86-BMI1BMI2-NEXT:    subl $8, %esp
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    pushl %ebp
-; X86-BMI1BMI2-NEXT:    calll use64 at PLT
-; X86-BMI1BMI2-NEXT:    addl $16, %esp
-; X86-BMI1BMI2-NEXT:    andl %ebp, %esi
-; X86-BMI1BMI2-NEXT:    andl %ebx, %edi
-; X86-BMI1BMI2-NEXT:    movl %esi, %eax
-; X86-BMI1BMI2-NEXT:    movl %edi, %edx
-; X86-BMI1BMI2-NEXT:    addl $12, %esp
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %edi
-; X86-BMI1BMI2-NEXT:    popl %ebx
-; X86-BMI1BMI2-NEXT:    popl %ebp
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr64_c0:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %ebp
+; X86-BMI1-NEXT:    pushl %ebx
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    subl $12, %esp
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movl %eax, %edi
+; X86-BMI1-NEXT:    shrl %cl, %edi
+; X86-BMI1-NEXT:    shrdl %cl, %eax, %esi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB41_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %edi, %esi
+; X86-BMI1-NEXT:    xorl %edi, %edi
+; X86-BMI1-NEXT:  .LBB41_2:
+; X86-BMI1-NEXT:    movb $64, %cl
+; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $-1, %ebp
+; X86-BMI1-NEXT:    movl $-1, %ebx
+; X86-BMI1-NEXT:    shrl %cl, %ebx
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB41_4
+; X86-BMI1-NEXT:  # %bb.3:
+; X86-BMI1-NEXT:    movl %ebx, %ebp
+; X86-BMI1-NEXT:    xorl %ebx, %ebx
+; X86-BMI1-NEXT:  .LBB41_4:
+; X86-BMI1-NEXT:    subl $8, %esp
+; X86-BMI1-NEXT:    pushl %ebx
+; X86-BMI1-NEXT:    pushl %ebp
+; X86-BMI1-NEXT:    calll use64 at PLT
+; X86-BMI1-NEXT:    addl $16, %esp
+; X86-BMI1-NEXT:    andl %ebp, %esi
+; X86-BMI1-NEXT:    andl %ebx, %edi
+; X86-BMI1-NEXT:    movl %esi, %eax
+; X86-BMI1-NEXT:    movl %edi, %edx
+; X86-BMI1-NEXT:    addl $12, %esp
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    popl %ebx
+; X86-BMI1-NEXT:    popl %ebp
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr64_c0:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %ebp
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    pushl %edi
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    subl $12, %esp
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    shrdl %cl, %eax, %esi
+; X86-BMI2-NEXT:    shrxl %ecx, %eax, %edi
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB41_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %edi, %esi
+; X86-BMI2-NEXT:    xorl %edi, %edi
+; X86-BMI2-NEXT:  .LBB41_2:
+; X86-BMI2-NEXT:    movb $64, %al
+; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movl $-1, %ebp
+; X86-BMI2-NEXT:    shrxl %eax, %ebp, %ebx
+; X86-BMI2-NEXT:    testb $32, %al
+; X86-BMI2-NEXT:    je .LBB41_4
+; X86-BMI2-NEXT:  # %bb.3:
+; X86-BMI2-NEXT:    movl %ebx, %ebp
+; X86-BMI2-NEXT:    xorl %ebx, %ebx
+; X86-BMI2-NEXT:  .LBB41_4:
+; X86-BMI2-NEXT:    subl $8, %esp
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    pushl %ebp
+; X86-BMI2-NEXT:    calll use64 at PLT
+; X86-BMI2-NEXT:    addl $16, %esp
+; X86-BMI2-NEXT:    andl %ebp, %esi
+; X86-BMI2-NEXT:    andl %ebx, %edi
+; X86-BMI2-NEXT:    movl %esi, %eax
+; X86-BMI2-NEXT:    movl %edi, %edx
+; X86-BMI2-NEXT:    addl $12, %esp
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %edi
+; X86-BMI2-NEXT:    popl %ebx
+; X86-BMI2-NEXT:    popl %ebp
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_c0:
 ; X64-NOBMI:       # %bb.0:
@@ -5016,45 +5016,45 @@ define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    popq %r14
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr64_c0:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    pushq %r14
-; X64-BMI1NOTBM-NEXT:    pushq %rbx
-; X64-BMI1NOTBM-NEXT:    pushq %rax
-; X64-BMI1NOTBM-NEXT:    movq %rsi, %rcx
-; X64-BMI1NOTBM-NEXT:    movq %rdi, %r14
-; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $rcx
-; X64-BMI1NOTBM-NEXT:    shrq %cl, %r14
-; X64-BMI1NOTBM-NEXT:    negb %dl
-; X64-BMI1NOTBM-NEXT:    movq $-1, %rbx
-; X64-BMI1NOTBM-NEXT:    movl %edx, %ecx
-; X64-BMI1NOTBM-NEXT:    shrq %cl, %rbx
-; X64-BMI1NOTBM-NEXT:    movq %rbx, %rdi
-; X64-BMI1NOTBM-NEXT:    callq use64 at PLT
-; X64-BMI1NOTBM-NEXT:    andq %r14, %rbx
-; X64-BMI1NOTBM-NEXT:    movq %rbx, %rax
-; X64-BMI1NOTBM-NEXT:    addq $8, %rsp
-; X64-BMI1NOTBM-NEXT:    popq %rbx
-; X64-BMI1NOTBM-NEXT:    popq %r14
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr64_c0:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    pushq %r14
-; X64-BMI1BMI2-NEXT:    pushq %rbx
-; X64-BMI1BMI2-NEXT:    pushq %rax
-; X64-BMI1BMI2-NEXT:    movq %rdx, %rbx
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %r14
-; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI1BMI2-NEXT:    negb %al
-; X64-BMI1BMI2-NEXT:    movq $-1, %rcx
-; X64-BMI1BMI2-NEXT:    shrxq %rax, %rcx, %rdi
-; X64-BMI1BMI2-NEXT:    callq use64 at PLT
-; X64-BMI1BMI2-NEXT:    bzhiq %rbx, %r14, %rax
-; X64-BMI1BMI2-NEXT:    addq $8, %rsp
-; X64-BMI1BMI2-NEXT:    popq %rbx
-; X64-BMI1BMI2-NEXT:    popq %r14
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr64_c0:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    pushq %r14
+; X64-BMI1-NEXT:    pushq %rbx
+; X64-BMI1-NEXT:    pushq %rax
+; X64-BMI1-NEXT:    movq %rsi, %rcx
+; X64-BMI1-NEXT:    movq %rdi, %r14
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI1-NEXT:    shrq %cl, %r14
+; X64-BMI1-NEXT:    negb %dl
+; X64-BMI1-NEXT:    movq $-1, %rbx
+; X64-BMI1-NEXT:    movl %edx, %ecx
+; X64-BMI1-NEXT:    shrq %cl, %rbx
+; X64-BMI1-NEXT:    movq %rbx, %rdi
+; X64-BMI1-NEXT:    callq use64 at PLT
+; X64-BMI1-NEXT:    andq %r14, %rbx
+; X64-BMI1-NEXT:    movq %rbx, %rax
+; X64-BMI1-NEXT:    addq $8, %rsp
+; X64-BMI1-NEXT:    popq %rbx
+; X64-BMI1-NEXT:    popq %r14
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr64_c0:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    pushq %r14
+; X64-BMI2-NEXT:    pushq %rbx
+; X64-BMI2-NEXT:    pushq %rax
+; X64-BMI2-NEXT:    movq %rdx, %rbx
+; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %r14
+; X64-BMI2-NEXT:    movl %ebx, %eax
+; X64-BMI2-NEXT:    negb %al
+; X64-BMI2-NEXT:    movq $-1, %rcx
+; X64-BMI2-NEXT:    shrxq %rax, %rcx, %rdi
+; X64-BMI2-NEXT:    callq use64 at PLT
+; X64-BMI2-NEXT:    bzhiq %rbx, %r14, %rax
+; X64-BMI2-NEXT:    addq $8, %rsp
+; X64-BMI2-NEXT:    popq %rbx
+; X64-BMI2-NEXT:    popq %r14
+; X64-BMI2-NEXT:    retq
   %shifted = lshr i64 %val, %numskipbits
   %numhighbits = sub i64 64, %numlowbits
   %mask = lshr i64 -1, %numhighbits
@@ -5110,95 +5110,95 @@ define i64 @bextr64_c1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; X86-NOBMI-NEXT:    popl %ebp
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr64_c1_indexzext:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %ebp
-; X86-BMI1NOTBM-NEXT:    pushl %ebx
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    subl $12, %esp
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %esi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB42_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
-; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB42_2:
-; X86-BMI1NOTBM-NEXT:    movb $64, %cl
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $-1, %ebp
-; X86-BMI1NOTBM-NEXT:    movl $-1, %ebx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %ebx
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB42_4
-; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl %ebx, %ebp
-; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB42_4:
-; X86-BMI1NOTBM-NEXT:    subl $8, %esp
-; X86-BMI1NOTBM-NEXT:    pushl %ebx
-; X86-BMI1NOTBM-NEXT:    pushl %ebp
-; X86-BMI1NOTBM-NEXT:    calll use64 at PLT
-; X86-BMI1NOTBM-NEXT:    addl $16, %esp
-; X86-BMI1NOTBM-NEXT:    andl %ebp, %esi
-; X86-BMI1NOTBM-NEXT:    andl %ebx, %edi
-; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:    movl %edi, %edx
-; X86-BMI1NOTBM-NEXT:    addl $12, %esp
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    popl %ebx
-; X86-BMI1NOTBM-NEXT:    popl %ebp
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr64_c1_indexzext:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebp
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    pushl %edi
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    subl $12, %esp
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %esi
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edi
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB42_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %edi, %esi
-; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
-; X86-BMI1BMI2-NEXT:  .LBB42_2:
-; X86-BMI1BMI2-NEXT:    movb $64, %al
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movl $-1, %ebp
-; X86-BMI1BMI2-NEXT:    shrxl %eax, %ebp, %ebx
-; X86-BMI1BMI2-NEXT:    testb $32, %al
-; X86-BMI1BMI2-NEXT:    je .LBB42_4
-; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    movl %ebx, %ebp
-; X86-BMI1BMI2-NEXT:    xorl %ebx, %ebx
-; X86-BMI1BMI2-NEXT:  .LBB42_4:
-; X86-BMI1BMI2-NEXT:    subl $8, %esp
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    pushl %ebp
-; X86-BMI1BMI2-NEXT:    calll use64 at PLT
-; X86-BMI1BMI2-NEXT:    addl $16, %esp
-; X86-BMI1BMI2-NEXT:    andl %ebp, %esi
-; X86-BMI1BMI2-NEXT:    andl %ebx, %edi
-; X86-BMI1BMI2-NEXT:    movl %esi, %eax
-; X86-BMI1BMI2-NEXT:    movl %edi, %edx
-; X86-BMI1BMI2-NEXT:    addl $12, %esp
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %edi
-; X86-BMI1BMI2-NEXT:    popl %ebx
-; X86-BMI1BMI2-NEXT:    popl %ebp
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr64_c1_indexzext:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %ebp
+; X86-BMI1-NEXT:    pushl %ebx
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    subl $12, %esp
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movl %eax, %edi
+; X86-BMI1-NEXT:    shrl %cl, %edi
+; X86-BMI1-NEXT:    shrdl %cl, %eax, %esi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB42_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %edi, %esi
+; X86-BMI1-NEXT:    xorl %edi, %edi
+; X86-BMI1-NEXT:  .LBB42_2:
+; X86-BMI1-NEXT:    movb $64, %cl
+; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $-1, %ebp
+; X86-BMI1-NEXT:    movl $-1, %ebx
+; X86-BMI1-NEXT:    shrl %cl, %ebx
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB42_4
+; X86-BMI1-NEXT:  # %bb.3:
+; X86-BMI1-NEXT:    movl %ebx, %ebp
+; X86-BMI1-NEXT:    xorl %ebx, %ebx
+; X86-BMI1-NEXT:  .LBB42_4:
+; X86-BMI1-NEXT:    subl $8, %esp
+; X86-BMI1-NEXT:    pushl %ebx
+; X86-BMI1-NEXT:    pushl %ebp
+; X86-BMI1-NEXT:    calll use64 at PLT
+; X86-BMI1-NEXT:    addl $16, %esp
+; X86-BMI1-NEXT:    andl %ebp, %esi
+; X86-BMI1-NEXT:    andl %ebx, %edi
+; X86-BMI1-NEXT:    movl %esi, %eax
+; X86-BMI1-NEXT:    movl %edi, %edx
+; X86-BMI1-NEXT:    addl $12, %esp
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    popl %ebx
+; X86-BMI1-NEXT:    popl %ebp
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr64_c1_indexzext:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %ebp
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    pushl %edi
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    subl $12, %esp
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    shrdl %cl, %eax, %esi
+; X86-BMI2-NEXT:    shrxl %ecx, %eax, %edi
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB42_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %edi, %esi
+; X86-BMI2-NEXT:    xorl %edi, %edi
+; X86-BMI2-NEXT:  .LBB42_2:
+; X86-BMI2-NEXT:    movb $64, %al
+; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movl $-1, %ebp
+; X86-BMI2-NEXT:    shrxl %eax, %ebp, %ebx
+; X86-BMI2-NEXT:    testb $32, %al
+; X86-BMI2-NEXT:    je .LBB42_4
+; X86-BMI2-NEXT:  # %bb.3:
+; X86-BMI2-NEXT:    movl %ebx, %ebp
+; X86-BMI2-NEXT:    xorl %ebx, %ebx
+; X86-BMI2-NEXT:  .LBB42_4:
+; X86-BMI2-NEXT:    subl $8, %esp
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    pushl %ebp
+; X86-BMI2-NEXT:    calll use64 at PLT
+; X86-BMI2-NEXT:    addl $16, %esp
+; X86-BMI2-NEXT:    andl %ebp, %esi
+; X86-BMI2-NEXT:    andl %ebx, %edi
+; X86-BMI2-NEXT:    movl %esi, %eax
+; X86-BMI2-NEXT:    movl %edi, %edx
+; X86-BMI2-NEXT:    addl $12, %esp
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %edi
+; X86-BMI2-NEXT:    popl %ebx
+; X86-BMI2-NEXT:    popl %ebp
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_c1_indexzext:
 ; X64-NOBMI:       # %bb.0:
@@ -5222,46 +5222,46 @@ define i64 @bextr64_c1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; X64-NOBMI-NEXT:    popq %r14
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr64_c1_indexzext:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    pushq %r14
-; X64-BMI1NOTBM-NEXT:    pushq %rbx
-; X64-BMI1NOTBM-NEXT:    pushq %rax
-; X64-BMI1NOTBM-NEXT:    movl %esi, %ecx
-; X64-BMI1NOTBM-NEXT:    movq %rdi, %r14
-; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-BMI1NOTBM-NEXT:    shrq %cl, %r14
-; X64-BMI1NOTBM-NEXT:    negb %dl
-; X64-BMI1NOTBM-NEXT:    movq $-1, %rbx
-; X64-BMI1NOTBM-NEXT:    movl %edx, %ecx
-; X64-BMI1NOTBM-NEXT:    shrq %cl, %rbx
-; X64-BMI1NOTBM-NEXT:    movq %rbx, %rdi
-; X64-BMI1NOTBM-NEXT:    callq use64 at PLT
-; X64-BMI1NOTBM-NEXT:    andq %r14, %rbx
-; X64-BMI1NOTBM-NEXT:    movq %rbx, %rax
-; X64-BMI1NOTBM-NEXT:    addq $8, %rsp
-; X64-BMI1NOTBM-NEXT:    popq %rbx
-; X64-BMI1NOTBM-NEXT:    popq %r14
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr64_c1_indexzext:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    pushq %r14
-; X64-BMI1BMI2-NEXT:    pushq %rbx
-; X64-BMI1BMI2-NEXT:    pushq %rax
-; X64-BMI1BMI2-NEXT:    movl %edx, %ebx
-; X64-BMI1BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %r14
-; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI1BMI2-NEXT:    negb %al
-; X64-BMI1BMI2-NEXT:    movq $-1, %rcx
-; X64-BMI1BMI2-NEXT:    shrxq %rax, %rcx, %rdi
-; X64-BMI1BMI2-NEXT:    callq use64 at PLT
-; X64-BMI1BMI2-NEXT:    bzhiq %rbx, %r14, %rax
-; X64-BMI1BMI2-NEXT:    addq $8, %rsp
-; X64-BMI1BMI2-NEXT:    popq %rbx
-; X64-BMI1BMI2-NEXT:    popq %r14
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr64_c1_indexzext:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    pushq %r14
+; X64-BMI1-NEXT:    pushq %rbx
+; X64-BMI1-NEXT:    pushq %rax
+; X64-BMI1-NEXT:    movl %esi, %ecx
+; X64-BMI1-NEXT:    movq %rdi, %r14
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI1-NEXT:    shrq %cl, %r14
+; X64-BMI1-NEXT:    negb %dl
+; X64-BMI1-NEXT:    movq $-1, %rbx
+; X64-BMI1-NEXT:    movl %edx, %ecx
+; X64-BMI1-NEXT:    shrq %cl, %rbx
+; X64-BMI1-NEXT:    movq %rbx, %rdi
+; X64-BMI1-NEXT:    callq use64 at PLT
+; X64-BMI1-NEXT:    andq %r14, %rbx
+; X64-BMI1-NEXT:    movq %rbx, %rax
+; X64-BMI1-NEXT:    addq $8, %rsp
+; X64-BMI1-NEXT:    popq %rbx
+; X64-BMI1-NEXT:    popq %r14
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr64_c1_indexzext:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    pushq %r14
+; X64-BMI2-NEXT:    pushq %rbx
+; X64-BMI2-NEXT:    pushq %rax
+; X64-BMI2-NEXT:    movl %edx, %ebx
+; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %r14
+; X64-BMI2-NEXT:    movl %ebx, %eax
+; X64-BMI2-NEXT:    negb %al
+; X64-BMI2-NEXT:    movq $-1, %rcx
+; X64-BMI2-NEXT:    shrxq %rax, %rcx, %rdi
+; X64-BMI2-NEXT:    callq use64 at PLT
+; X64-BMI2-NEXT:    bzhiq %rbx, %r14, %rax
+; X64-BMI2-NEXT:    addq $8, %rsp
+; X64-BMI2-NEXT:    popq %rbx
+; X64-BMI2-NEXT:    popq %r14
+; X64-BMI2-NEXT:    retq
   %skip = zext i8 %numskipbits to i64
   %shifted = lshr i64 %val, %skip
   %numhighbits = sub i8 64, %numlowbits
@@ -5320,97 +5320,97 @@ define i64 @bextr64_c2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    popl %ebp
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr64_c2_load:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %ebp
-; X86-BMI1NOTBM-NEXT:    pushl %ebx
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    subl $12, %esp
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movl (%eax), %esi
-; X86-BMI1NOTBM-NEXT:    movl 4(%eax), %eax
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %esi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB43_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
-; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB43_2:
-; X86-BMI1NOTBM-NEXT:    movb $64, %cl
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $-1, %ebp
-; X86-BMI1NOTBM-NEXT:    movl $-1, %ebx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %ebx
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB43_4
-; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl %ebx, %ebp
-; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB43_4:
-; X86-BMI1NOTBM-NEXT:    subl $8, %esp
-; X86-BMI1NOTBM-NEXT:    pushl %ebx
-; X86-BMI1NOTBM-NEXT:    pushl %ebp
-; X86-BMI1NOTBM-NEXT:    calll use64 at PLT
-; X86-BMI1NOTBM-NEXT:    addl $16, %esp
-; X86-BMI1NOTBM-NEXT:    andl %ebp, %esi
-; X86-BMI1NOTBM-NEXT:    andl %ebx, %edi
-; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:    movl %edi, %edx
-; X86-BMI1NOTBM-NEXT:    addl $12, %esp
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    popl %ebx
-; X86-BMI1NOTBM-NEXT:    popl %ebp
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr64_c2_load:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebp
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    pushl %edi
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    subl $12, %esp
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    movl (%eax), %esi
-; X86-BMI1BMI2-NEXT:    movl 4(%eax), %eax
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edi
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %esi
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB43_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %edi, %esi
-; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
-; X86-BMI1BMI2-NEXT:  .LBB43_2:
-; X86-BMI1BMI2-NEXT:    movb $64, %al
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movl $-1, %ebp
-; X86-BMI1BMI2-NEXT:    shrxl %eax, %ebp, %ebx
-; X86-BMI1BMI2-NEXT:    testb $32, %al
-; X86-BMI1BMI2-NEXT:    je .LBB43_4
-; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    movl %ebx, %ebp
-; X86-BMI1BMI2-NEXT:    xorl %ebx, %ebx
-; X86-BMI1BMI2-NEXT:  .LBB43_4:
-; X86-BMI1BMI2-NEXT:    subl $8, %esp
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    pushl %ebp
-; X86-BMI1BMI2-NEXT:    calll use64 at PLT
-; X86-BMI1BMI2-NEXT:    addl $16, %esp
-; X86-BMI1BMI2-NEXT:    andl %ebp, %esi
-; X86-BMI1BMI2-NEXT:    andl %ebx, %edi
-; X86-BMI1BMI2-NEXT:    movl %esi, %eax
-; X86-BMI1BMI2-NEXT:    movl %edi, %edx
-; X86-BMI1BMI2-NEXT:    addl $12, %esp
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %edi
-; X86-BMI1BMI2-NEXT:    popl %ebx
-; X86-BMI1BMI2-NEXT:    popl %ebp
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr64_c2_load:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %ebp
+; X86-BMI1-NEXT:    pushl %ebx
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    subl $12, %esp
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movl (%eax), %esi
+; X86-BMI1-NEXT:    movl 4(%eax), %eax
+; X86-BMI1-NEXT:    movl %eax, %edi
+; X86-BMI1-NEXT:    shrl %cl, %edi
+; X86-BMI1-NEXT:    shrdl %cl, %eax, %esi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB43_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %edi, %esi
+; X86-BMI1-NEXT:    xorl %edi, %edi
+; X86-BMI1-NEXT:  .LBB43_2:
+; X86-BMI1-NEXT:    movb $64, %cl
+; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $-1, %ebp
+; X86-BMI1-NEXT:    movl $-1, %ebx
+; X86-BMI1-NEXT:    shrl %cl, %ebx
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB43_4
+; X86-BMI1-NEXT:  # %bb.3:
+; X86-BMI1-NEXT:    movl %ebx, %ebp
+; X86-BMI1-NEXT:    xorl %ebx, %ebx
+; X86-BMI1-NEXT:  .LBB43_4:
+; X86-BMI1-NEXT:    subl $8, %esp
+; X86-BMI1-NEXT:    pushl %ebx
+; X86-BMI1-NEXT:    pushl %ebp
+; X86-BMI1-NEXT:    calll use64 at PLT
+; X86-BMI1-NEXT:    addl $16, %esp
+; X86-BMI1-NEXT:    andl %ebp, %esi
+; X86-BMI1-NEXT:    andl %ebx, %edi
+; X86-BMI1-NEXT:    movl %esi, %eax
+; X86-BMI1-NEXT:    movl %edi, %edx
+; X86-BMI1-NEXT:    addl $12, %esp
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    popl %ebx
+; X86-BMI1-NEXT:    popl %ebp
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr64_c2_load:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %ebp
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    pushl %edi
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    subl $12, %esp
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movl (%eax), %esi
+; X86-BMI2-NEXT:    movl 4(%eax), %eax
+; X86-BMI2-NEXT:    shrxl %ecx, %eax, %edi
+; X86-BMI2-NEXT:    shrdl %cl, %eax, %esi
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB43_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %edi, %esi
+; X86-BMI2-NEXT:    xorl %edi, %edi
+; X86-BMI2-NEXT:  .LBB43_2:
+; X86-BMI2-NEXT:    movb $64, %al
+; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movl $-1, %ebp
+; X86-BMI2-NEXT:    shrxl %eax, %ebp, %ebx
+; X86-BMI2-NEXT:    testb $32, %al
+; X86-BMI2-NEXT:    je .LBB43_4
+; X86-BMI2-NEXT:  # %bb.3:
+; X86-BMI2-NEXT:    movl %ebx, %ebp
+; X86-BMI2-NEXT:    xorl %ebx, %ebx
+; X86-BMI2-NEXT:  .LBB43_4:
+; X86-BMI2-NEXT:    subl $8, %esp
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    pushl %ebp
+; X86-BMI2-NEXT:    calll use64 at PLT
+; X86-BMI2-NEXT:    addl $16, %esp
+; X86-BMI2-NEXT:    andl %ebp, %esi
+; X86-BMI2-NEXT:    andl %ebx, %edi
+; X86-BMI2-NEXT:    movl %esi, %eax
+; X86-BMI2-NEXT:    movl %edi, %edx
+; X86-BMI2-NEXT:    addl $12, %esp
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %edi
+; X86-BMI2-NEXT:    popl %ebx
+; X86-BMI2-NEXT:    popl %ebp
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_c2_load:
 ; X64-NOBMI:       # %bb.0:
@@ -5434,45 +5434,45 @@ define i64 @bextr64_c2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X64-NOBMI-NEXT:    popq %r14
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr64_c2_load:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    pushq %r14
-; X64-BMI1NOTBM-NEXT:    pushq %rbx
-; X64-BMI1NOTBM-NEXT:    pushq %rax
-; X64-BMI1NOTBM-NEXT:    movq %rsi, %rcx
-; X64-BMI1NOTBM-NEXT:    movq (%rdi), %r14
-; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $rcx
-; X64-BMI1NOTBM-NEXT:    shrq %cl, %r14
-; X64-BMI1NOTBM-NEXT:    negb %dl
-; X64-BMI1NOTBM-NEXT:    movq $-1, %rbx
-; X64-BMI1NOTBM-NEXT:    movl %edx, %ecx
-; X64-BMI1NOTBM-NEXT:    shrq %cl, %rbx
-; X64-BMI1NOTBM-NEXT:    movq %rbx, %rdi
-; X64-BMI1NOTBM-NEXT:    callq use64 at PLT
-; X64-BMI1NOTBM-NEXT:    andq %r14, %rbx
-; X64-BMI1NOTBM-NEXT:    movq %rbx, %rax
-; X64-BMI1NOTBM-NEXT:    addq $8, %rsp
-; X64-BMI1NOTBM-NEXT:    popq %rbx
-; X64-BMI1NOTBM-NEXT:    popq %r14
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr64_c2_load:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    pushq %r14
-; X64-BMI1BMI2-NEXT:    pushq %rbx
-; X64-BMI1BMI2-NEXT:    pushq %rax
-; X64-BMI1BMI2-NEXT:    movq %rdx, %rbx
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, (%rdi), %r14
-; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI1BMI2-NEXT:    negb %al
-; X64-BMI1BMI2-NEXT:    movq $-1, %rcx
-; X64-BMI1BMI2-NEXT:    shrxq %rax, %rcx, %rdi
-; X64-BMI1BMI2-NEXT:    callq use64 at PLT
-; X64-BMI1BMI2-NEXT:    bzhiq %rbx, %r14, %rax
-; X64-BMI1BMI2-NEXT:    addq $8, %rsp
-; X64-BMI1BMI2-NEXT:    popq %rbx
-; X64-BMI1BMI2-NEXT:    popq %r14
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr64_c2_load:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    pushq %r14
+; X64-BMI1-NEXT:    pushq %rbx
+; X64-BMI1-NEXT:    pushq %rax
+; X64-BMI1-NEXT:    movq %rsi, %rcx
+; X64-BMI1-NEXT:    movq (%rdi), %r14
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI1-NEXT:    shrq %cl, %r14
+; X64-BMI1-NEXT:    negb %dl
+; X64-BMI1-NEXT:    movq $-1, %rbx
+; X64-BMI1-NEXT:    movl %edx, %ecx
+; X64-BMI1-NEXT:    shrq %cl, %rbx
+; X64-BMI1-NEXT:    movq %rbx, %rdi
+; X64-BMI1-NEXT:    callq use64 at PLT
+; X64-BMI1-NEXT:    andq %r14, %rbx
+; X64-BMI1-NEXT:    movq %rbx, %rax
+; X64-BMI1-NEXT:    addq $8, %rsp
+; X64-BMI1-NEXT:    popq %rbx
+; X64-BMI1-NEXT:    popq %r14
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr64_c2_load:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    pushq %r14
+; X64-BMI2-NEXT:    pushq %rbx
+; X64-BMI2-NEXT:    pushq %rax
+; X64-BMI2-NEXT:    movq %rdx, %rbx
+; X64-BMI2-NEXT:    shrxq %rsi, (%rdi), %r14
+; X64-BMI2-NEXT:    movl %ebx, %eax
+; X64-BMI2-NEXT:    negb %al
+; X64-BMI2-NEXT:    movq $-1, %rcx
+; X64-BMI2-NEXT:    shrxq %rax, %rcx, %rdi
+; X64-BMI2-NEXT:    callq use64 at PLT
+; X64-BMI2-NEXT:    bzhiq %rbx, %r14, %rax
+; X64-BMI2-NEXT:    addq $8, %rsp
+; X64-BMI2-NEXT:    popq %rbx
+; X64-BMI2-NEXT:    popq %r14
+; X64-BMI2-NEXT:    retq
   %val = load i64, i64* %w
   %shifted = lshr i64 %val, %numskipbits
   %numhighbits = sub i64 64, %numlowbits
@@ -5530,97 +5530,97 @@ define i64 @bextr64_c3_load_indexzext(i64* %w, i8 %numskipbits, i8 %numlowbits)
 ; X86-NOBMI-NEXT:    popl %ebp
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr64_c3_load_indexzext:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %ebp
-; X86-BMI1NOTBM-NEXT:    pushl %ebx
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    subl $12, %esp
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movl (%eax), %esi
-; X86-BMI1NOTBM-NEXT:    movl 4(%eax), %eax
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %esi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB44_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
-; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB44_2:
-; X86-BMI1NOTBM-NEXT:    movb $64, %cl
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $-1, %ebp
-; X86-BMI1NOTBM-NEXT:    movl $-1, %ebx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %ebx
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB44_4
-; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl %ebx, %ebp
-; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB44_4:
-; X86-BMI1NOTBM-NEXT:    subl $8, %esp
-; X86-BMI1NOTBM-NEXT:    pushl %ebx
-; X86-BMI1NOTBM-NEXT:    pushl %ebp
-; X86-BMI1NOTBM-NEXT:    calll use64 at PLT
-; X86-BMI1NOTBM-NEXT:    addl $16, %esp
-; X86-BMI1NOTBM-NEXT:    andl %ebp, %esi
-; X86-BMI1NOTBM-NEXT:    andl %ebx, %edi
-; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:    movl %edi, %edx
-; X86-BMI1NOTBM-NEXT:    addl $12, %esp
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    popl %ebx
-; X86-BMI1NOTBM-NEXT:    popl %ebp
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr64_c3_load_indexzext:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebp
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    pushl %edi
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    subl $12, %esp
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    movl (%eax), %esi
-; X86-BMI1BMI2-NEXT:    movl 4(%eax), %eax
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edi
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %esi
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB44_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %edi, %esi
-; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
-; X86-BMI1BMI2-NEXT:  .LBB44_2:
-; X86-BMI1BMI2-NEXT:    movb $64, %al
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movl $-1, %ebp
-; X86-BMI1BMI2-NEXT:    shrxl %eax, %ebp, %ebx
-; X86-BMI1BMI2-NEXT:    testb $32, %al
-; X86-BMI1BMI2-NEXT:    je .LBB44_4
-; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    movl %ebx, %ebp
-; X86-BMI1BMI2-NEXT:    xorl %ebx, %ebx
-; X86-BMI1BMI2-NEXT:  .LBB44_4:
-; X86-BMI1BMI2-NEXT:    subl $8, %esp
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    pushl %ebp
-; X86-BMI1BMI2-NEXT:    calll use64 at PLT
-; X86-BMI1BMI2-NEXT:    addl $16, %esp
-; X86-BMI1BMI2-NEXT:    andl %ebp, %esi
-; X86-BMI1BMI2-NEXT:    andl %ebx, %edi
-; X86-BMI1BMI2-NEXT:    movl %esi, %eax
-; X86-BMI1BMI2-NEXT:    movl %edi, %edx
-; X86-BMI1BMI2-NEXT:    addl $12, %esp
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %edi
-; X86-BMI1BMI2-NEXT:    popl %ebx
-; X86-BMI1BMI2-NEXT:    popl %ebp
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr64_c3_load_indexzext:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %ebp
+; X86-BMI1-NEXT:    pushl %ebx
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    subl $12, %esp
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movl (%eax), %esi
+; X86-BMI1-NEXT:    movl 4(%eax), %eax
+; X86-BMI1-NEXT:    movl %eax, %edi
+; X86-BMI1-NEXT:    shrl %cl, %edi
+; X86-BMI1-NEXT:    shrdl %cl, %eax, %esi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB44_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %edi, %esi
+; X86-BMI1-NEXT:    xorl %edi, %edi
+; X86-BMI1-NEXT:  .LBB44_2:
+; X86-BMI1-NEXT:    movb $64, %cl
+; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $-1, %ebp
+; X86-BMI1-NEXT:    movl $-1, %ebx
+; X86-BMI1-NEXT:    shrl %cl, %ebx
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB44_4
+; X86-BMI1-NEXT:  # %bb.3:
+; X86-BMI1-NEXT:    movl %ebx, %ebp
+; X86-BMI1-NEXT:    xorl %ebx, %ebx
+; X86-BMI1-NEXT:  .LBB44_4:
+; X86-BMI1-NEXT:    subl $8, %esp
+; X86-BMI1-NEXT:    pushl %ebx
+; X86-BMI1-NEXT:    pushl %ebp
+; X86-BMI1-NEXT:    calll use64 at PLT
+; X86-BMI1-NEXT:    addl $16, %esp
+; X86-BMI1-NEXT:    andl %ebp, %esi
+; X86-BMI1-NEXT:    andl %ebx, %edi
+; X86-BMI1-NEXT:    movl %esi, %eax
+; X86-BMI1-NEXT:    movl %edi, %edx
+; X86-BMI1-NEXT:    addl $12, %esp
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    popl %ebx
+; X86-BMI1-NEXT:    popl %ebp
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr64_c3_load_indexzext:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %ebp
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    pushl %edi
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    subl $12, %esp
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movl (%eax), %esi
+; X86-BMI2-NEXT:    movl 4(%eax), %eax
+; X86-BMI2-NEXT:    shrxl %ecx, %eax, %edi
+; X86-BMI2-NEXT:    shrdl %cl, %eax, %esi
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB44_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %edi, %esi
+; X86-BMI2-NEXT:    xorl %edi, %edi
+; X86-BMI2-NEXT:  .LBB44_2:
+; X86-BMI2-NEXT:    movb $64, %al
+; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movl $-1, %ebp
+; X86-BMI2-NEXT:    shrxl %eax, %ebp, %ebx
+; X86-BMI2-NEXT:    testb $32, %al
+; X86-BMI2-NEXT:    je .LBB44_4
+; X86-BMI2-NEXT:  # %bb.3:
+; X86-BMI2-NEXT:    movl %ebx, %ebp
+; X86-BMI2-NEXT:    xorl %ebx, %ebx
+; X86-BMI2-NEXT:  .LBB44_4:
+; X86-BMI2-NEXT:    subl $8, %esp
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    pushl %ebp
+; X86-BMI2-NEXT:    calll use64 at PLT
+; X86-BMI2-NEXT:    addl $16, %esp
+; X86-BMI2-NEXT:    andl %ebp, %esi
+; X86-BMI2-NEXT:    andl %ebx, %edi
+; X86-BMI2-NEXT:    movl %esi, %eax
+; X86-BMI2-NEXT:    movl %edi, %edx
+; X86-BMI2-NEXT:    addl $12, %esp
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %edi
+; X86-BMI2-NEXT:    popl %ebx
+; X86-BMI2-NEXT:    popl %ebp
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_c3_load_indexzext:
 ; X64-NOBMI:       # %bb.0:
@@ -5644,46 +5644,46 @@ define i64 @bextr64_c3_load_indexzext(i64* %w, i8 %numskipbits, i8 %numlowbits)
 ; X64-NOBMI-NEXT:    popq %r14
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr64_c3_load_indexzext:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    pushq %r14
-; X64-BMI1NOTBM-NEXT:    pushq %rbx
-; X64-BMI1NOTBM-NEXT:    pushq %rax
-; X64-BMI1NOTBM-NEXT:    movl %esi, %ecx
-; X64-BMI1NOTBM-NEXT:    movq (%rdi), %r14
-; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-BMI1NOTBM-NEXT:    shrq %cl, %r14
-; X64-BMI1NOTBM-NEXT:    negb %dl
-; X64-BMI1NOTBM-NEXT:    movq $-1, %rbx
-; X64-BMI1NOTBM-NEXT:    movl %edx, %ecx
-; X64-BMI1NOTBM-NEXT:    shrq %cl, %rbx
-; X64-BMI1NOTBM-NEXT:    movq %rbx, %rdi
-; X64-BMI1NOTBM-NEXT:    callq use64 at PLT
-; X64-BMI1NOTBM-NEXT:    andq %r14, %rbx
-; X64-BMI1NOTBM-NEXT:    movq %rbx, %rax
-; X64-BMI1NOTBM-NEXT:    addq $8, %rsp
-; X64-BMI1NOTBM-NEXT:    popq %rbx
-; X64-BMI1NOTBM-NEXT:    popq %r14
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr64_c3_load_indexzext:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    pushq %r14
-; X64-BMI1BMI2-NEXT:    pushq %rbx
-; X64-BMI1BMI2-NEXT:    pushq %rax
-; X64-BMI1BMI2-NEXT:    movl %edx, %ebx
-; X64-BMI1BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, (%rdi), %r14
-; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI1BMI2-NEXT:    negb %al
-; X64-BMI1BMI2-NEXT:    movq $-1, %rcx
-; X64-BMI1BMI2-NEXT:    shrxq %rax, %rcx, %rdi
-; X64-BMI1BMI2-NEXT:    callq use64 at PLT
-; X64-BMI1BMI2-NEXT:    bzhiq %rbx, %r14, %rax
-; X64-BMI1BMI2-NEXT:    addq $8, %rsp
-; X64-BMI1BMI2-NEXT:    popq %rbx
-; X64-BMI1BMI2-NEXT:    popq %r14
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr64_c3_load_indexzext:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    pushq %r14
+; X64-BMI1-NEXT:    pushq %rbx
+; X64-BMI1-NEXT:    pushq %rax
+; X64-BMI1-NEXT:    movl %esi, %ecx
+; X64-BMI1-NEXT:    movq (%rdi), %r14
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI1-NEXT:    shrq %cl, %r14
+; X64-BMI1-NEXT:    negb %dl
+; X64-BMI1-NEXT:    movq $-1, %rbx
+; X64-BMI1-NEXT:    movl %edx, %ecx
+; X64-BMI1-NEXT:    shrq %cl, %rbx
+; X64-BMI1-NEXT:    movq %rbx, %rdi
+; X64-BMI1-NEXT:    callq use64 at PLT
+; X64-BMI1-NEXT:    andq %r14, %rbx
+; X64-BMI1-NEXT:    movq %rbx, %rax
+; X64-BMI1-NEXT:    addq $8, %rsp
+; X64-BMI1-NEXT:    popq %rbx
+; X64-BMI1-NEXT:    popq %r14
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr64_c3_load_indexzext:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    pushq %r14
+; X64-BMI2-NEXT:    pushq %rbx
+; X64-BMI2-NEXT:    pushq %rax
+; X64-BMI2-NEXT:    movl %edx, %ebx
+; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-BMI2-NEXT:    shrxq %rsi, (%rdi), %r14
+; X64-BMI2-NEXT:    movl %ebx, %eax
+; X64-BMI2-NEXT:    negb %al
+; X64-BMI2-NEXT:    movq $-1, %rcx
+; X64-BMI2-NEXT:    shrxq %rax, %rcx, %rdi
+; X64-BMI2-NEXT:    callq use64 at PLT
+; X64-BMI2-NEXT:    bzhiq %rbx, %r14, %rax
+; X64-BMI2-NEXT:    addq $8, %rsp
+; X64-BMI2-NEXT:    popq %rbx
+; X64-BMI2-NEXT:    popq %r14
+; X64-BMI2-NEXT:    retq
   %val = load i64, i64* %w
   %skip = zext i8 %numskipbits to i64
   %shifted = lshr i64 %val, %skip
@@ -5742,95 +5742,95 @@ define i64 @bextr64_c4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits)
 ; X86-NOBMI-NEXT:    popl %ebp
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr64_c4_commutative:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %ebp
-; X86-BMI1NOTBM-NEXT:    pushl %ebx
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    subl $12, %esp
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %esi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB45_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
-; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB45_2:
-; X86-BMI1NOTBM-NEXT:    movb $64, %cl
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $-1, %ebp
-; X86-BMI1NOTBM-NEXT:    movl $-1, %ebx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %ebx
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB45_4
-; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl %ebx, %ebp
-; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB45_4:
-; X86-BMI1NOTBM-NEXT:    subl $8, %esp
-; X86-BMI1NOTBM-NEXT:    pushl %ebx
-; X86-BMI1NOTBM-NEXT:    pushl %ebp
-; X86-BMI1NOTBM-NEXT:    calll use64 at PLT
-; X86-BMI1NOTBM-NEXT:    addl $16, %esp
-; X86-BMI1NOTBM-NEXT:    andl %ebp, %esi
-; X86-BMI1NOTBM-NEXT:    andl %ebx, %edi
-; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:    movl %edi, %edx
-; X86-BMI1NOTBM-NEXT:    addl $12, %esp
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    popl %ebx
-; X86-BMI1NOTBM-NEXT:    popl %ebp
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr64_c4_commutative:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebp
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    pushl %edi
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    subl $12, %esp
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %esi
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edi
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB45_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %edi, %esi
-; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
-; X86-BMI1BMI2-NEXT:  .LBB45_2:
-; X86-BMI1BMI2-NEXT:    movb $64, %al
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movl $-1, %ebp
-; X86-BMI1BMI2-NEXT:    shrxl %eax, %ebp, %ebx
-; X86-BMI1BMI2-NEXT:    testb $32, %al
-; X86-BMI1BMI2-NEXT:    je .LBB45_4
-; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    movl %ebx, %ebp
-; X86-BMI1BMI2-NEXT:    xorl %ebx, %ebx
-; X86-BMI1BMI2-NEXT:  .LBB45_4:
-; X86-BMI1BMI2-NEXT:    subl $8, %esp
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    pushl %ebp
-; X86-BMI1BMI2-NEXT:    calll use64 at PLT
-; X86-BMI1BMI2-NEXT:    addl $16, %esp
-; X86-BMI1BMI2-NEXT:    andl %ebp, %esi
-; X86-BMI1BMI2-NEXT:    andl %ebx, %edi
-; X86-BMI1BMI2-NEXT:    movl %esi, %eax
-; X86-BMI1BMI2-NEXT:    movl %edi, %edx
-; X86-BMI1BMI2-NEXT:    addl $12, %esp
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %edi
-; X86-BMI1BMI2-NEXT:    popl %ebx
-; X86-BMI1BMI2-NEXT:    popl %ebp
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr64_c4_commutative:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %ebp
+; X86-BMI1-NEXT:    pushl %ebx
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    subl $12, %esp
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movl %eax, %edi
+; X86-BMI1-NEXT:    shrl %cl, %edi
+; X86-BMI1-NEXT:    shrdl %cl, %eax, %esi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB45_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %edi, %esi
+; X86-BMI1-NEXT:    xorl %edi, %edi
+; X86-BMI1-NEXT:  .LBB45_2:
+; X86-BMI1-NEXT:    movb $64, %cl
+; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $-1, %ebp
+; X86-BMI1-NEXT:    movl $-1, %ebx
+; X86-BMI1-NEXT:    shrl %cl, %ebx
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB45_4
+; X86-BMI1-NEXT:  # %bb.3:
+; X86-BMI1-NEXT:    movl %ebx, %ebp
+; X86-BMI1-NEXT:    xorl %ebx, %ebx
+; X86-BMI1-NEXT:  .LBB45_4:
+; X86-BMI1-NEXT:    subl $8, %esp
+; X86-BMI1-NEXT:    pushl %ebx
+; X86-BMI1-NEXT:    pushl %ebp
+; X86-BMI1-NEXT:    calll use64 at PLT
+; X86-BMI1-NEXT:    addl $16, %esp
+; X86-BMI1-NEXT:    andl %ebp, %esi
+; X86-BMI1-NEXT:    andl %ebx, %edi
+; X86-BMI1-NEXT:    movl %esi, %eax
+; X86-BMI1-NEXT:    movl %edi, %edx
+; X86-BMI1-NEXT:    addl $12, %esp
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    popl %ebx
+; X86-BMI1-NEXT:    popl %ebp
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr64_c4_commutative:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %ebp
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    pushl %edi
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    subl $12, %esp
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    shrdl %cl, %eax, %esi
+; X86-BMI2-NEXT:    shrxl %ecx, %eax, %edi
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB45_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %edi, %esi
+; X86-BMI2-NEXT:    xorl %edi, %edi
+; X86-BMI2-NEXT:  .LBB45_2:
+; X86-BMI2-NEXT:    movb $64, %al
+; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movl $-1, %ebp
+; X86-BMI2-NEXT:    shrxl %eax, %ebp, %ebx
+; X86-BMI2-NEXT:    testb $32, %al
+; X86-BMI2-NEXT:    je .LBB45_4
+; X86-BMI2-NEXT:  # %bb.3:
+; X86-BMI2-NEXT:    movl %ebx, %ebp
+; X86-BMI2-NEXT:    xorl %ebx, %ebx
+; X86-BMI2-NEXT:  .LBB45_4:
+; X86-BMI2-NEXT:    subl $8, %esp
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    pushl %ebp
+; X86-BMI2-NEXT:    calll use64 at PLT
+; X86-BMI2-NEXT:    addl $16, %esp
+; X86-BMI2-NEXT:    andl %ebp, %esi
+; X86-BMI2-NEXT:    andl %ebx, %edi
+; X86-BMI2-NEXT:    movl %esi, %eax
+; X86-BMI2-NEXT:    movl %edi, %edx
+; X86-BMI2-NEXT:    addl $12, %esp
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %edi
+; X86-BMI2-NEXT:    popl %ebx
+; X86-BMI2-NEXT:    popl %ebp
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_c4_commutative:
 ; X64-NOBMI:       # %bb.0:
@@ -5854,45 +5854,45 @@ define i64 @bextr64_c4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits)
 ; X64-NOBMI-NEXT:    popq %r14
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr64_c4_commutative:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    pushq %r14
-; X64-BMI1NOTBM-NEXT:    pushq %rbx
-; X64-BMI1NOTBM-NEXT:    pushq %rax
-; X64-BMI1NOTBM-NEXT:    movq %rsi, %rcx
-; X64-BMI1NOTBM-NEXT:    movq %rdi, %r14
-; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $rcx
-; X64-BMI1NOTBM-NEXT:    shrq %cl, %r14
-; X64-BMI1NOTBM-NEXT:    negb %dl
-; X64-BMI1NOTBM-NEXT:    movq $-1, %rbx
-; X64-BMI1NOTBM-NEXT:    movl %edx, %ecx
-; X64-BMI1NOTBM-NEXT:    shrq %cl, %rbx
-; X64-BMI1NOTBM-NEXT:    movq %rbx, %rdi
-; X64-BMI1NOTBM-NEXT:    callq use64 at PLT
-; X64-BMI1NOTBM-NEXT:    andq %r14, %rbx
-; X64-BMI1NOTBM-NEXT:    movq %rbx, %rax
-; X64-BMI1NOTBM-NEXT:    addq $8, %rsp
-; X64-BMI1NOTBM-NEXT:    popq %rbx
-; X64-BMI1NOTBM-NEXT:    popq %r14
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr64_c4_commutative:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    pushq %r14
-; X64-BMI1BMI2-NEXT:    pushq %rbx
-; X64-BMI1BMI2-NEXT:    pushq %rax
-; X64-BMI1BMI2-NEXT:    movq %rdx, %rbx
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %r14
-; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI1BMI2-NEXT:    negb %al
-; X64-BMI1BMI2-NEXT:    movq $-1, %rcx
-; X64-BMI1BMI2-NEXT:    shrxq %rax, %rcx, %rdi
-; X64-BMI1BMI2-NEXT:    callq use64 at PLT
-; X64-BMI1BMI2-NEXT:    bzhiq %rbx, %r14, %rax
-; X64-BMI1BMI2-NEXT:    addq $8, %rsp
-; X64-BMI1BMI2-NEXT:    popq %rbx
-; X64-BMI1BMI2-NEXT:    popq %r14
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr64_c4_commutative:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    pushq %r14
+; X64-BMI1-NEXT:    pushq %rbx
+; X64-BMI1-NEXT:    pushq %rax
+; X64-BMI1-NEXT:    movq %rsi, %rcx
+; X64-BMI1-NEXT:    movq %rdi, %r14
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI1-NEXT:    shrq %cl, %r14
+; X64-BMI1-NEXT:    negb %dl
+; X64-BMI1-NEXT:    movq $-1, %rbx
+; X64-BMI1-NEXT:    movl %edx, %ecx
+; X64-BMI1-NEXT:    shrq %cl, %rbx
+; X64-BMI1-NEXT:    movq %rbx, %rdi
+; X64-BMI1-NEXT:    callq use64 at PLT
+; X64-BMI1-NEXT:    andq %r14, %rbx
+; X64-BMI1-NEXT:    movq %rbx, %rax
+; X64-BMI1-NEXT:    addq $8, %rsp
+; X64-BMI1-NEXT:    popq %rbx
+; X64-BMI1-NEXT:    popq %r14
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr64_c4_commutative:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    pushq %r14
+; X64-BMI2-NEXT:    pushq %rbx
+; X64-BMI2-NEXT:    pushq %rax
+; X64-BMI2-NEXT:    movq %rdx, %rbx
+; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %r14
+; X64-BMI2-NEXT:    movl %ebx, %eax
+; X64-BMI2-NEXT:    negb %al
+; X64-BMI2-NEXT:    movq $-1, %rcx
+; X64-BMI2-NEXT:    shrxq %rax, %rcx, %rdi
+; X64-BMI2-NEXT:    callq use64 at PLT
+; X64-BMI2-NEXT:    bzhiq %rbx, %r14, %rax
+; X64-BMI2-NEXT:    addq $8, %rsp
+; X64-BMI2-NEXT:    popq %rbx
+; X64-BMI2-NEXT:    popq %r14
+; X64-BMI2-NEXT:    retq
   %shifted = lshr i64 %val, %numskipbits
   %numhighbits = sub i64 64, %numlowbits
   %mask = lshr i64 -1, %numhighbits
@@ -5953,105 +5953,105 @@ define i64 @bextr64_c5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
 ; X86-NOBMI-NEXT:    popl %ebp
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr64_c5_skipextrauses:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %ebp
-; X86-BMI1NOTBM-NEXT:    pushl %ebx
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    subl $12, %esp
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %esi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB46_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
-; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB46_2:
-; X86-BMI1NOTBM-NEXT:    movb $64, %cl
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $-1, %ebx
-; X86-BMI1NOTBM-NEXT:    movl $-1, %ebp
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %ebp
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB46_4
-; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl %ebp, %ebx
-; X86-BMI1NOTBM-NEXT:    xorl %ebp, %ebp
-; X86-BMI1NOTBM-NEXT:  .LBB46_4:
-; X86-BMI1NOTBM-NEXT:    subl $8, %esp
-; X86-BMI1NOTBM-NEXT:    pushl %ebp
-; X86-BMI1NOTBM-NEXT:    pushl %ebx
-; X86-BMI1NOTBM-NEXT:    calll use64 at PLT
-; X86-BMI1NOTBM-NEXT:    addl $16, %esp
-; X86-BMI1NOTBM-NEXT:    andl %ebx, %esi
-; X86-BMI1NOTBM-NEXT:    andl %ebp, %edi
-; X86-BMI1NOTBM-NEXT:    subl $8, %esp
-; X86-BMI1NOTBM-NEXT:    pushl {{[0-9]+}}(%esp)
-; X86-BMI1NOTBM-NEXT:    pushl {{[0-9]+}}(%esp)
-; X86-BMI1NOTBM-NEXT:    calll use64 at PLT
-; X86-BMI1NOTBM-NEXT:    addl $16, %esp
-; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:    movl %edi, %edx
-; X86-BMI1NOTBM-NEXT:    addl $12, %esp
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    popl %ebx
-; X86-BMI1NOTBM-NEXT:    popl %ebp
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr64_c5_skipextrauses:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebp
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    pushl %edi
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    subl $12, %esp
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %esi
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edi
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB46_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %edi, %esi
-; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
-; X86-BMI1BMI2-NEXT:  .LBB46_2:
-; X86-BMI1BMI2-NEXT:    movb $64, %al
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movl $-1, %ebp
-; X86-BMI1BMI2-NEXT:    shrxl %eax, %ebp, %ebx
-; X86-BMI1BMI2-NEXT:    testb $32, %al
-; X86-BMI1BMI2-NEXT:    je .LBB46_4
-; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    movl %ebx, %ebp
-; X86-BMI1BMI2-NEXT:    xorl %ebx, %ebx
-; X86-BMI1BMI2-NEXT:  .LBB46_4:
-; X86-BMI1BMI2-NEXT:    subl $8, %esp
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    pushl %ebp
-; X86-BMI1BMI2-NEXT:    calll use64 at PLT
-; X86-BMI1BMI2-NEXT:    addl $16, %esp
-; X86-BMI1BMI2-NEXT:    andl %ebp, %esi
-; X86-BMI1BMI2-NEXT:    andl %ebx, %edi
-; X86-BMI1BMI2-NEXT:    subl $8, %esp
-; X86-BMI1BMI2-NEXT:    pushl {{[0-9]+}}(%esp)
-; X86-BMI1BMI2-NEXT:    pushl {{[0-9]+}}(%esp)
-; X86-BMI1BMI2-NEXT:    calll use64 at PLT
-; X86-BMI1BMI2-NEXT:    addl $16, %esp
-; X86-BMI1BMI2-NEXT:    movl %esi, %eax
-; X86-BMI1BMI2-NEXT:    movl %edi, %edx
-; X86-BMI1BMI2-NEXT:    addl $12, %esp
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %edi
-; X86-BMI1BMI2-NEXT:    popl %ebx
-; X86-BMI1BMI2-NEXT:    popl %ebp
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr64_c5_skipextrauses:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %ebp
+; X86-BMI1-NEXT:    pushl %ebx
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    subl $12, %esp
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    movl %eax, %edi
+; X86-BMI1-NEXT:    shrl %cl, %edi
+; X86-BMI1-NEXT:    shrdl %cl, %eax, %esi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB46_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %edi, %esi
+; X86-BMI1-NEXT:    xorl %edi, %edi
+; X86-BMI1-NEXT:  .LBB46_2:
+; X86-BMI1-NEXT:    movb $64, %cl
+; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $-1, %ebx
+; X86-BMI1-NEXT:    movl $-1, %ebp
+; X86-BMI1-NEXT:    shrl %cl, %ebp
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB46_4
+; X86-BMI1-NEXT:  # %bb.3:
+; X86-BMI1-NEXT:    movl %ebp, %ebx
+; X86-BMI1-NEXT:    xorl %ebp, %ebp
+; X86-BMI1-NEXT:  .LBB46_4:
+; X86-BMI1-NEXT:    subl $8, %esp
+; X86-BMI1-NEXT:    pushl %ebp
+; X86-BMI1-NEXT:    pushl %ebx
+; X86-BMI1-NEXT:    calll use64 at PLT
+; X86-BMI1-NEXT:    addl $16, %esp
+; X86-BMI1-NEXT:    andl %ebx, %esi
+; X86-BMI1-NEXT:    andl %ebp, %edi
+; X86-BMI1-NEXT:    subl $8, %esp
+; X86-BMI1-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-BMI1-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-BMI1-NEXT:    calll use64 at PLT
+; X86-BMI1-NEXT:    addl $16, %esp
+; X86-BMI1-NEXT:    movl %esi, %eax
+; X86-BMI1-NEXT:    movl %edi, %edx
+; X86-BMI1-NEXT:    addl $12, %esp
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    popl %ebx
+; X86-BMI1-NEXT:    popl %ebp
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr64_c5_skipextrauses:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %ebp
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    pushl %edi
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    subl $12, %esp
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    shrdl %cl, %eax, %esi
+; X86-BMI2-NEXT:    shrxl %ecx, %eax, %edi
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB46_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %edi, %esi
+; X86-BMI2-NEXT:    xorl %edi, %edi
+; X86-BMI2-NEXT:  .LBB46_2:
+; X86-BMI2-NEXT:    movb $64, %al
+; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movl $-1, %ebp
+; X86-BMI2-NEXT:    shrxl %eax, %ebp, %ebx
+; X86-BMI2-NEXT:    testb $32, %al
+; X86-BMI2-NEXT:    je .LBB46_4
+; X86-BMI2-NEXT:  # %bb.3:
+; X86-BMI2-NEXT:    movl %ebx, %ebp
+; X86-BMI2-NEXT:    xorl %ebx, %ebx
+; X86-BMI2-NEXT:  .LBB46_4:
+; X86-BMI2-NEXT:    subl $8, %esp
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    pushl %ebp
+; X86-BMI2-NEXT:    calll use64 at PLT
+; X86-BMI2-NEXT:    addl $16, %esp
+; X86-BMI2-NEXT:    andl %ebp, %esi
+; X86-BMI2-NEXT:    andl %ebx, %edi
+; X86-BMI2-NEXT:    subl $8, %esp
+; X86-BMI2-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT:    calll use64 at PLT
+; X86-BMI2-NEXT:    addl $16, %esp
+; X86-BMI2-NEXT:    movl %esi, %eax
+; X86-BMI2-NEXT:    movl %edi, %edx
+; X86-BMI2-NEXT:    addl $12, %esp
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %edi
+; X86-BMI2-NEXT:    popl %ebx
+; X86-BMI2-NEXT:    popl %ebp
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_c5_skipextrauses:
 ; X64-NOBMI:       # %bb.0:
@@ -6077,51 +6077,51 @@ define i64 @bextr64_c5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
 ; X64-NOBMI-NEXT:    popq %r15
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr64_c5_skipextrauses:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    pushq %r15
-; X64-BMI1NOTBM-NEXT:    pushq %r14
-; X64-BMI1NOTBM-NEXT:    pushq %rbx
-; X64-BMI1NOTBM-NEXT:    movq %rsi, %r14
-; X64-BMI1NOTBM-NEXT:    movq %rdi, %r15
-; X64-BMI1NOTBM-NEXT:    movl %r14d, %ecx
-; X64-BMI1NOTBM-NEXT:    shrq %cl, %r15
-; X64-BMI1NOTBM-NEXT:    negb %dl
-; X64-BMI1NOTBM-NEXT:    movq $-1, %rbx
-; X64-BMI1NOTBM-NEXT:    movl %edx, %ecx
-; X64-BMI1NOTBM-NEXT:    shrq %cl, %rbx
-; X64-BMI1NOTBM-NEXT:    movq %rbx, %rdi
-; X64-BMI1NOTBM-NEXT:    callq use64 at PLT
-; X64-BMI1NOTBM-NEXT:    andq %r15, %rbx
-; X64-BMI1NOTBM-NEXT:    movq %r14, %rdi
-; X64-BMI1NOTBM-NEXT:    callq use64 at PLT
-; X64-BMI1NOTBM-NEXT:    movq %rbx, %rax
-; X64-BMI1NOTBM-NEXT:    popq %rbx
-; X64-BMI1NOTBM-NEXT:    popq %r14
-; X64-BMI1NOTBM-NEXT:    popq %r15
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr64_c5_skipextrauses:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    pushq %r15
-; X64-BMI1BMI2-NEXT:    pushq %r14
-; X64-BMI1BMI2-NEXT:    pushq %rbx
-; X64-BMI1BMI2-NEXT:    movq %rdx, %rbx
-; X64-BMI1BMI2-NEXT:    movq %rsi, %r14
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %r15
-; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI1BMI2-NEXT:    negb %al
-; X64-BMI1BMI2-NEXT:    movq $-1, %rcx
-; X64-BMI1BMI2-NEXT:    shrxq %rax, %rcx, %rdi
-; X64-BMI1BMI2-NEXT:    callq use64 at PLT
-; X64-BMI1BMI2-NEXT:    bzhiq %rbx, %r15, %rbx
-; X64-BMI1BMI2-NEXT:    movq %r14, %rdi
-; X64-BMI1BMI2-NEXT:    callq use64 at PLT
-; X64-BMI1BMI2-NEXT:    movq %rbx, %rax
-; X64-BMI1BMI2-NEXT:    popq %rbx
-; X64-BMI1BMI2-NEXT:    popq %r14
-; X64-BMI1BMI2-NEXT:    popq %r15
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr64_c5_skipextrauses:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    pushq %r15
+; X64-BMI1-NEXT:    pushq %r14
+; X64-BMI1-NEXT:    pushq %rbx
+; X64-BMI1-NEXT:    movq %rsi, %r14
+; X64-BMI1-NEXT:    movq %rdi, %r15
+; X64-BMI1-NEXT:    movl %r14d, %ecx
+; X64-BMI1-NEXT:    shrq %cl, %r15
+; X64-BMI1-NEXT:    negb %dl
+; X64-BMI1-NEXT:    movq $-1, %rbx
+; X64-BMI1-NEXT:    movl %edx, %ecx
+; X64-BMI1-NEXT:    shrq %cl, %rbx
+; X64-BMI1-NEXT:    movq %rbx, %rdi
+; X64-BMI1-NEXT:    callq use64 at PLT
+; X64-BMI1-NEXT:    andq %r15, %rbx
+; X64-BMI1-NEXT:    movq %r14, %rdi
+; X64-BMI1-NEXT:    callq use64 at PLT
+; X64-BMI1-NEXT:    movq %rbx, %rax
+; X64-BMI1-NEXT:    popq %rbx
+; X64-BMI1-NEXT:    popq %r14
+; X64-BMI1-NEXT:    popq %r15
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr64_c5_skipextrauses:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    pushq %r15
+; X64-BMI2-NEXT:    pushq %r14
+; X64-BMI2-NEXT:    pushq %rbx
+; X64-BMI2-NEXT:    movq %rdx, %rbx
+; X64-BMI2-NEXT:    movq %rsi, %r14
+; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %r15
+; X64-BMI2-NEXT:    movl %ebx, %eax
+; X64-BMI2-NEXT:    negb %al
+; X64-BMI2-NEXT:    movq $-1, %rcx
+; X64-BMI2-NEXT:    shrxq %rax, %rcx, %rdi
+; X64-BMI2-NEXT:    callq use64 at PLT
+; X64-BMI2-NEXT:    bzhiq %rbx, %r15, %rbx
+; X64-BMI2-NEXT:    movq %r14, %rdi
+; X64-BMI2-NEXT:    callq use64 at PLT
+; X64-BMI2-NEXT:    movq %rbx, %rax
+; X64-BMI2-NEXT:    popq %rbx
+; X64-BMI2-NEXT:    popq %r14
+; X64-BMI2-NEXT:    popq %r15
+; X64-BMI2-NEXT:    retq
   %shifted = lshr i64 %val, %numskipbits
   %numhighbits = sub i64 64, %numlowbits
   %mask = lshr i64 -1, %numhighbits
@@ -6162,54 +6162,54 @@ define i32 @bextr64_32_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr64_32_c0:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1NOTBM-NEXT:    movl %esi, %edx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %esi, %eax
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB47_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB47_2:
-; X86-BMI1NOTBM-NEXT:    movb $64, %cl
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $-1, %eax
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB47_4
-; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl $-1, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB47_4:
-; X86-BMI1NOTBM-NEXT:    andl %edx, %eax
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr64_32_c0:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %edx
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB47_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB47_2:
-; X86-BMI1BMI2-NEXT:    movb $64, %cl
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl $-1, %eax
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB47_4
-; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %eax
-; X86-BMI1BMI2-NEXT:  .LBB47_4:
-; X86-BMI1BMI2-NEXT:    andl %edx, %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr64_32_c0:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    movl %esi, %edx
+; X86-BMI1-NEXT:    shrl %cl, %edx
+; X86-BMI1-NEXT:    shrdl %cl, %esi, %eax
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    jne .LBB47_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %eax, %edx
+; X86-BMI1-NEXT:  .LBB47_2:
+; X86-BMI1-NEXT:    movb $64, %cl
+; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $-1, %eax
+; X86-BMI1-NEXT:    shrl %cl, %eax
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    jne .LBB47_4
+; X86-BMI1-NEXT:  # %bb.3:
+; X86-BMI1-NEXT:    movl $-1, %eax
+; X86-BMI1-NEXT:  .LBB47_4:
+; X86-BMI1-NEXT:    andl %edx, %eax
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr64_32_c0:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    shrdl %cl, %eax, %edx
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB47_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    shrxl %ecx, %eax, %edx
+; X86-BMI2-NEXT:  .LBB47_2:
+; X86-BMI2-NEXT:    movb $64, %cl
+; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl $-1, %eax
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB47_4
+; X86-BMI2-NEXT:  # %bb.3:
+; X86-BMI2-NEXT:    shrxl %ecx, %eax, %eax
+; X86-BMI2-NEXT:  .LBB47_4:
+; X86-BMI2-NEXT:    andl %edx, %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_32_c0:
 ; X64-NOBMI:       # %bb.0:
@@ -6224,20 +6224,20 @@ define i32 @bextr64_32_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr64_32_c0:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrq %rax, %rdi, %rax
-; X64-BMI1NOTBM-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr64_32_c0:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %rax
-; X64-BMI1BMI2-NEXT:    bzhil %edx, %eax, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr64_32_c0:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrq %rax, %rdi, %rax
+; X64-BMI1-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr64_32_c0:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %rax
+; X64-BMI2-NEXT:    bzhil %edx, %eax, %eax
+; X64-BMI2-NEXT:    retq
   %shifted = lshr i64 %val, %numskipbits
   %numhighbits = sub i64 64, %numlowbits
   %mask = lshr i64 -1, %numhighbits
@@ -6270,44 +6270,44 @@ define i32 @bextr64_32_c1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr64_32_c1:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-BMI1NOTBM-NEXT:    movl %edi, %edx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %esi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB48_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %esi, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB48_2:
-; X86-BMI1NOTBM-NEXT:    shll $8, %eax
-; X86-BMI1NOTBM-NEXT:    bextrl %eax, %edx, %eax
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr64_32_c1:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edx
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB48_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %edx
-; X86-BMI1BMI2-NEXT:  .LBB48_2:
-; X86-BMI1BMI2-NEXT:    bzhil %eax, %edx, %eax
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr64_32_c1:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI1-NEXT:    movl %edi, %edx
+; X86-BMI1-NEXT:    shrl %cl, %edx
+; X86-BMI1-NEXT:    shrdl %cl, %edi, %esi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    jne .LBB48_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %esi, %edx
+; X86-BMI1-NEXT:  .LBB48_2:
+; X86-BMI1-NEXT:    shll $8, %eax
+; X86-BMI1-NEXT:    bextrl %eax, %edx, %eax
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr64_32_c1:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    shrdl %cl, %esi, %edx
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB48_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    shrxl %ecx, %esi, %edx
+; X86-BMI2-NEXT:  .LBB48_2:
+; X86-BMI2-NEXT:    bzhil %eax, %edx, %eax
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_32_c1:
 ; X64-NOBMI:       # %bb.0:
@@ -6322,20 +6322,20 @@ define i32 @bextr64_32_c1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr64_32_c1:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrq %rax, %rdi, %rax
-; X64-BMI1NOTBM-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr64_32_c1:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %rax
-; X64-BMI1BMI2-NEXT:    bzhil %edx, %eax, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr64_32_c1:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrq %rax, %rdi, %rax
+; X64-BMI1-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr64_32_c1:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %rax
+; X64-BMI2-NEXT:    bzhil %edx, %eax, %eax
+; X64-BMI2-NEXT:    retq
   %shifted = lshr i64 %val, %numskipbits
   %truncshifted = trunc i64 %shifted to i32
   %numhighbits = sub i32 32, %numlowbits
@@ -6369,44 +6369,44 @@ define i32 @bextr64_32_c2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr64_32_c2:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-BMI1NOTBM-NEXT:    movl %edi, %edx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %esi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB49_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %esi, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB49_2:
-; X86-BMI1NOTBM-NEXT:    shll $8, %eax
-; X86-BMI1NOTBM-NEXT:    bextrl %eax, %edx, %eax
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr64_32_c2:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edx
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB49_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %edx
-; X86-BMI1BMI2-NEXT:  .LBB49_2:
-; X86-BMI1BMI2-NEXT:    bzhil %eax, %edx, %eax
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr64_32_c2:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI1-NEXT:    movl %edi, %edx
+; X86-BMI1-NEXT:    shrl %cl, %edx
+; X86-BMI1-NEXT:    shrdl %cl, %edi, %esi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    jne .LBB49_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %esi, %edx
+; X86-BMI1-NEXT:  .LBB49_2:
+; X86-BMI1-NEXT:    shll $8, %eax
+; X86-BMI1-NEXT:    bextrl %eax, %edx, %eax
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr64_32_c2:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    shrdl %cl, %esi, %edx
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB49_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    shrxl %ecx, %esi, %edx
+; X86-BMI2-NEXT:  .LBB49_2:
+; X86-BMI2-NEXT:    bzhil %eax, %edx, %eax
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_32_c2:
 ; X64-NOBMI:       # %bb.0:
@@ -6421,20 +6421,20 @@ define i32 @bextr64_32_c2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr64_32_c2:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrq %rax, %rdi, %rax
-; X64-BMI1NOTBM-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr64_32_c2:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %rax
-; X64-BMI1BMI2-NEXT:    bzhil %edx, %eax, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr64_32_c2:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrq %rax, %rdi, %rax
+; X64-BMI1-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr64_32_c2:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %rax
+; X64-BMI2-NEXT:    bzhil %edx, %eax, %eax
+; X64-BMI2-NEXT:    retq
   %shifted = lshr i64 %val, %numskipbits
   %numhighbits = sub i32 32, %numlowbits
   %mask = lshr i32 -1, %numhighbits
@@ -6475,59 +6475,59 @@ define i32 @bextr64_32_c3(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr64_32_c3:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1NOTBM-NEXT:    movl %esi, %edx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %esi, %eax
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB50_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB50_2:
-; X86-BMI1NOTBM-NEXT:    movb $64, %cl
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
-; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %esi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB50_4
-; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB50_4:
-; X86-BMI1NOTBM-NEXT:    andl %edx, %eax
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr64_32_c3:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %edx
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB50_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB50_2:
-; X86-BMI1BMI2-NEXT:    movb $64, %cl
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    xorl %eax, %eax
-; X86-BMI1BMI2-NEXT:    movl $-1, %esi
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %esi
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    jne .LBB50_4
-; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    movl %esi, %eax
-; X86-BMI1BMI2-NEXT:  .LBB50_4:
-; X86-BMI1BMI2-NEXT:    andl %edx, %eax
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr64_32_c3:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    movl %esi, %edx
+; X86-BMI1-NEXT:    shrl %cl, %edx
+; X86-BMI1-NEXT:    shrdl %cl, %esi, %eax
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    jne .LBB50_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %eax, %edx
+; X86-BMI1-NEXT:  .LBB50_2:
+; X86-BMI1-NEXT:    movb $64, %cl
+; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    xorl %eax, %eax
+; X86-BMI1-NEXT:    movl $-1, %esi
+; X86-BMI1-NEXT:    shrdl %cl, %eax, %esi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    jne .LBB50_4
+; X86-BMI1-NEXT:  # %bb.3:
+; X86-BMI1-NEXT:    movl %esi, %eax
+; X86-BMI1-NEXT:  .LBB50_4:
+; X86-BMI1-NEXT:    andl %edx, %eax
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr64_32_c3:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    shrdl %cl, %eax, %edx
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB50_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    shrxl %ecx, %eax, %edx
+; X86-BMI2-NEXT:  .LBB50_2:
+; X86-BMI2-NEXT:    movb $64, %cl
+; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    xorl %eax, %eax
+; X86-BMI2-NEXT:    movl $-1, %esi
+; X86-BMI2-NEXT:    shrdl %cl, %eax, %esi
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    jne .LBB50_4
+; X86-BMI2-NEXT:  # %bb.3:
+; X86-BMI2-NEXT:    movl %esi, %eax
+; X86-BMI2-NEXT:  .LBB50_4:
+; X86-BMI2-NEXT:    andl %edx, %eax
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_32_c3:
 ; X64-NOBMI:       # %bb.0:
@@ -6542,28 +6542,28 @@ define i32 @bextr64_32_c3(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr64_32_c3:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    movq %rsi, %rcx
-; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $rcx
-; X64-BMI1NOTBM-NEXT:    shrq %cl, %rdi
-; X64-BMI1NOTBM-NEXT:    negb %dl
-; X64-BMI1NOTBM-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
-; X64-BMI1NOTBM-NEXT:    movl %edx, %ecx
-; X64-BMI1NOTBM-NEXT:    shrq %cl, %rax
-; X64-BMI1NOTBM-NEXT:    andl %edi, %eax
-; X64-BMI1NOTBM-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr64_32_c3:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %rcx
-; X64-BMI1BMI2-NEXT:    negb %dl
-; X64-BMI1BMI2-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
-; X64-BMI1BMI2-NEXT:    shrxq %rdx, %rax, %rax
-; X64-BMI1BMI2-NEXT:    andl %ecx, %eax
-; X64-BMI1BMI2-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr64_32_c3:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    movq %rsi, %rcx
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI1-NEXT:    shrq %cl, %rdi
+; X64-BMI1-NEXT:    negb %dl
+; X64-BMI1-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
+; X64-BMI1-NEXT:    movl %edx, %ecx
+; X64-BMI1-NEXT:    shrq %cl, %rax
+; X64-BMI1-NEXT:    andl %edi, %eax
+; X64-BMI1-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr64_32_c3:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %rcx
+; X64-BMI2-NEXT:    negb %dl
+; X64-BMI2-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
+; X64-BMI2-NEXT:    shrxq %rdx, %rax, %rax
+; X64-BMI2-NEXT:    andl %ecx, %eax
+; X64-BMI2-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-BMI2-NEXT:    retq
   %shifted = lshr i64 %val, %numskipbits
   %numhighbits = sub i64 64, %numlowbits
   %mask = lshr i64 4294967295, %numhighbits
@@ -6589,22 +6589,22 @@ define i32 @bextr32_d0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr32_d0:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1NOTBM-NEXT:    shll $8, %eax
-; X86-BMI1NOTBM-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1NOTBM-NEXT:    orl %eax, %ecx
-; X86-BMI1NOTBM-NEXT:    bextrl %ecx, {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr32_d0:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    bzhil %eax, %ecx, %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr32_d0:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    shll $8, %eax
+; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    orl %eax, %ecx
+; X86-BMI1-NEXT:    bextrl %ecx, {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr32_d0:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    shrxl %ecx, {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    bzhil %eax, %ecx, %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr32_d0:
 ; X64-NOBMI:       # %bb.0:
@@ -6618,19 +6618,19 @@ define i32 @bextr32_d0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    shrl %cl, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr32_d0:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrl %eax, %edi, %eax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr32_d0:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxl %esi, %edi, %eax
-; X64-BMI1BMI2-NEXT:    bzhil %edx, %eax, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr32_d0:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrl %eax, %edi, %eax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr32_d0:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxl %esi, %edi, %eax
+; X64-BMI2-NEXT:    bzhil %edx, %eax, %eax
+; X64-BMI2-NEXT:    retq
   %shifted = lshr i32 %val, %numskipbits
   %numhighbits = sub i32 32, %numlowbits
   %highbitscleared = shl i32 %shifted, %numhighbits
@@ -6651,22 +6651,22 @@ define i32 @bextr32_d1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr32_d1_indexzext:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1NOTBM-NEXT:    shll $8, %eax
-; X86-BMI1NOTBM-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1NOTBM-NEXT:    orl %eax, %ecx
-; X86-BMI1NOTBM-NEXT:    bextrl %ecx, {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr32_d1_indexzext:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    bzhil %eax, %ecx, %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr32_d1_indexzext:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    shll $8, %eax
+; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    orl %eax, %ecx
+; X86-BMI1-NEXT:    bextrl %ecx, {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr32_d1_indexzext:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    shrxl %ecx, {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    bzhil %eax, %ecx, %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr32_d1_indexzext:
 ; X64-NOBMI:       # %bb.0:
@@ -6680,19 +6680,19 @@ define i32 @bextr32_d1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; X64-NOBMI-NEXT:    shrl %cl, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr32_d1_indexzext:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrl %eax, %edi, %eax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr32_d1_indexzext:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxl %esi, %edi, %eax
-; X64-BMI1BMI2-NEXT:    bzhil %edx, %eax, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr32_d1_indexzext:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrl %eax, %edi, %eax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr32_d1_indexzext:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxl %esi, %edi, %eax
+; X64-BMI2-NEXT:    bzhil %edx, %eax, %eax
+; X64-BMI2-NEXT:    retq
   %skip = zext i8 %numskipbits to i32
   %shifted = lshr i32 %val, %skip
   %numhighbits = sub i8 32, %numlowbits
@@ -6716,24 +6716,24 @@ define i32 @bextr32_d2_load(i32* %w, i32 %numskipbits, i32 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr32_d2_load:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    shll $8, %ecx
-; X86-BMI1NOTBM-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; X86-BMI1NOTBM-NEXT:    orl %ecx, %edx
-; X86-BMI1NOTBM-NEXT:    bextrl %edx, (%eax), %eax
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr32_d2_load:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %dl
-; X86-BMI1BMI2-NEXT:    shrxl %edx, (%ecx), %ecx
-; X86-BMI1BMI2-NEXT:    bzhil %eax, %ecx, %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr32_d2_load:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    shll $8, %ecx
+; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-BMI1-NEXT:    orl %ecx, %edx
+; X86-BMI1-NEXT:    bextrl %edx, (%eax), %eax
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr32_d2_load:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-BMI2-NEXT:    shrxl %edx, (%ecx), %ecx
+; X86-BMI2-NEXT:    bzhil %eax, %ecx, %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr32_d2_load:
 ; X64-NOBMI:       # %bb.0:
@@ -6747,19 +6747,19 @@ define i32 @bextr32_d2_load(i32* %w, i32 %numskipbits, i32 %numlowbits) nounwind
 ; X64-NOBMI-NEXT:    shrl %cl, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr32_d2_load:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrl %eax, (%rdi), %eax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr32_d2_load:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxl %esi, (%rdi), %eax
-; X64-BMI1BMI2-NEXT:    bzhil %edx, %eax, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr32_d2_load:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrl %eax, (%rdi), %eax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr32_d2_load:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxl %esi, (%rdi), %eax
+; X64-BMI2-NEXT:    bzhil %edx, %eax, %eax
+; X64-BMI2-NEXT:    retq
   %val = load i32, i32* %w
   %shifted = lshr i32 %val, %numskipbits
   %numhighbits = sub i32 32, %numlowbits
@@ -6782,24 +6782,24 @@ define i32 @bextr32_d3_load_indexzext(i32* %w, i8 %numskipbits, i8 %numlowbits)
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr32_d3_load_indexzext:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    shll $8, %ecx
-; X86-BMI1NOTBM-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; X86-BMI1NOTBM-NEXT:    orl %ecx, %edx
-; X86-BMI1NOTBM-NEXT:    bextrl %edx, (%eax), %eax
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr32_d3_load_indexzext:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %dl
-; X86-BMI1BMI2-NEXT:    shrxl %edx, (%ecx), %ecx
-; X86-BMI1BMI2-NEXT:    bzhil %eax, %ecx, %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr32_d3_load_indexzext:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    shll $8, %ecx
+; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-BMI1-NEXT:    orl %ecx, %edx
+; X86-BMI1-NEXT:    bextrl %edx, (%eax), %eax
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr32_d3_load_indexzext:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-BMI2-NEXT:    shrxl %edx, (%ecx), %ecx
+; X86-BMI2-NEXT:    bzhil %eax, %ecx, %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr32_d3_load_indexzext:
 ; X64-NOBMI:       # %bb.0:
@@ -6813,19 +6813,19 @@ define i32 @bextr32_d3_load_indexzext(i32* %w, i8 %numskipbits, i8 %numlowbits)
 ; X64-NOBMI-NEXT:    shrl %cl, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr32_d3_load_indexzext:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrl %eax, (%rdi), %eax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr32_d3_load_indexzext:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxl %esi, (%rdi), %eax
-; X64-BMI1BMI2-NEXT:    bzhil %edx, %eax, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr32_d3_load_indexzext:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrl %eax, (%rdi), %eax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr32_d3_load_indexzext:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxl %esi, (%rdi), %eax
+; X64-BMI2-NEXT:    bzhil %edx, %eax, %eax
+; X64-BMI2-NEXT:    retq
   %val = load i32, i32* %w
   %skip = zext i8 %numskipbits to i32
   %shifted = lshr i32 %val, %skip
@@ -6857,37 +6857,37 @@ define i32 @bextr32_d5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr32_d5_skipextrauses:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    subl $8, %esp
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    shll $8, %ecx
-; X86-BMI1NOTBM-NEXT:    movzbl %al, %edx
-; X86-BMI1NOTBM-NEXT:    orl %ecx, %edx
-; X86-BMI1NOTBM-NEXT:    bextrl %edx, {{[0-9]+}}(%esp), %esi
-; X86-BMI1NOTBM-NEXT:    movl %eax, (%esp)
-; X86-BMI1NOTBM-NEXT:    calll use32 at PLT
-; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:    addl $8, %esp
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr32_d5_skipextrauses:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    subl $8, %esp
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT:    bzhil %eax, %edx, %esi
-; X86-BMI1BMI2-NEXT:    movl %ecx, (%esp)
-; X86-BMI1BMI2-NEXT:    calll use32 at PLT
-; X86-BMI1BMI2-NEXT:    movl %esi, %eax
-; X86-BMI1BMI2-NEXT:    addl $8, %esp
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr32_d5_skipextrauses:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    subl $8, %esp
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    shll $8, %ecx
+; X86-BMI1-NEXT:    movzbl %al, %edx
+; X86-BMI1-NEXT:    orl %ecx, %edx
+; X86-BMI1-NEXT:    bextrl %edx, {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    movl %eax, (%esp)
+; X86-BMI1-NEXT:    calll use32 at PLT
+; X86-BMI1-NEXT:    movl %esi, %eax
+; X86-BMI1-NEXT:    addl $8, %esp
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr32_d5_skipextrauses:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    subl $8, %esp
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    shrxl %ecx, {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    bzhil %eax, %edx, %esi
+; X86-BMI2-NEXT:    movl %ecx, (%esp)
+; X86-BMI2-NEXT:    calll use32 at PLT
+; X86-BMI2-NEXT:    movl %esi, %eax
+; X86-BMI2-NEXT:    addl $8, %esp
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr32_d5_skipextrauses:
 ; X64-NOBMI:       # %bb.0:
@@ -6905,29 +6905,29 @@ define i32 @bextr32_d5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits
 ; X64-NOBMI-NEXT:    popq %rbx
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr32_d5_skipextrauses:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    pushq %rbx
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrl %eax, %edi, %ebx
-; X64-BMI1NOTBM-NEXT:    movl %esi, %edi
-; X64-BMI1NOTBM-NEXT:    callq use32 at PLT
-; X64-BMI1NOTBM-NEXT:    movl %ebx, %eax
-; X64-BMI1NOTBM-NEXT:    popq %rbx
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr32_d5_skipextrauses:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    pushq %rbx
-; X64-BMI1BMI2-NEXT:    shrxl %esi, %edi, %eax
-; X64-BMI1BMI2-NEXT:    bzhil %edx, %eax, %ebx
-; X64-BMI1BMI2-NEXT:    movl %esi, %edi
-; X64-BMI1BMI2-NEXT:    callq use32 at PLT
-; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI1BMI2-NEXT:    popq %rbx
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr32_d5_skipextrauses:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    pushq %rbx
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrl %eax, %edi, %ebx
+; X64-BMI1-NEXT:    movl %esi, %edi
+; X64-BMI1-NEXT:    callq use32 at PLT
+; X64-BMI1-NEXT:    movl %ebx, %eax
+; X64-BMI1-NEXT:    popq %rbx
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr32_d5_skipextrauses:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    pushq %rbx
+; X64-BMI2-NEXT:    shrxl %esi, %edi, %eax
+; X64-BMI2-NEXT:    bzhil %edx, %eax, %ebx
+; X64-BMI2-NEXT:    movl %esi, %edi
+; X64-BMI2-NEXT:    callq use32 at PLT
+; X64-BMI2-NEXT:    movl %ebx, %eax
+; X64-BMI2-NEXT:    popq %rbx
+; X64-BMI2-NEXT:    retq
   %shifted = lshr i32 %val, %numskipbits
   %numhighbits = sub i32 32, %numlowbits
   %highbitscleared = shl i32 %shifted, %numhighbits
@@ -6987,94 +6987,94 @@ define i64 @bextr64_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    popl %ebx
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr64_d0:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %ebx
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1NOTBM-NEXT:    movl %edx, %eax
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edx, %edi
-; X86-BMI1NOTBM-NEXT:    xorl %esi, %esi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB56_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
-; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB56_2:
-; X86-BMI1NOTBM-NEXT:    movb $64, %cl
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    shldl %cl, %edi, %eax
-; X86-BMI1NOTBM-NEXT:    shll %cl, %edi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    movl %edi, %ebx
-; X86-BMI1NOTBM-NEXT:    jne .LBB56_4
-; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl %eax, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB56_4:
-; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    movl $0, %edx
-; X86-BMI1NOTBM-NEXT:    jne .LBB56_6
-; X86-BMI1NOTBM-NEXT:  # %bb.5:
-; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB56_6:
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %ebx, %esi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB56_8
-; X86-BMI1NOTBM-NEXT:  # %bb.7:
-; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB56_8:
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    popl %ebx
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr64_d0:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %edi
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %edx, %eax
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edx, %esi
-; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB56_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %esi, %eax
-; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
-; X86-BMI1BMI2-NEXT:  .LBB56_2:
-; X86-BMI1BMI2-NEXT:    movb $64, %cl
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %esi
-; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %edi
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB56_4
-; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    movl %edi, %esi
-; X86-BMI1BMI2-NEXT:    movl $0, %edi
-; X86-BMI1BMI2-NEXT:  .LBB56_4:
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    jne .LBB56_6
-; X86-BMI1BMI2-NEXT:  # %bb.5:
-; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB56_6:
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edi
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    jne .LBB56_8
-; X86-BMI1BMI2-NEXT:  # %bb.7:
-; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:  .LBB56_8:
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %edi
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr64_d0:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %ebx
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI1-NEXT:    movl %edx, %eax
+; X86-BMI1-NEXT:    shrl %cl, %eax
+; X86-BMI1-NEXT:    shrdl %cl, %edx, %edi
+; X86-BMI1-NEXT:    xorl %esi, %esi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB56_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %eax, %edi
+; X86-BMI1-NEXT:    xorl %eax, %eax
+; X86-BMI1-NEXT:  .LBB56_2:
+; X86-BMI1-NEXT:    movb $64, %cl
+; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    shldl %cl, %edi, %eax
+; X86-BMI1-NEXT:    shll %cl, %edi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    movl %edi, %ebx
+; X86-BMI1-NEXT:    jne .LBB56_4
+; X86-BMI1-NEXT:  # %bb.3:
+; X86-BMI1-NEXT:    movl %eax, %ebx
+; X86-BMI1-NEXT:  .LBB56_4:
+; X86-BMI1-NEXT:    movl %ebx, %eax
+; X86-BMI1-NEXT:    shrl %cl, %eax
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    movl $0, %edx
+; X86-BMI1-NEXT:    jne .LBB56_6
+; X86-BMI1-NEXT:  # %bb.5:
+; X86-BMI1-NEXT:    movl %edi, %esi
+; X86-BMI1-NEXT:    movl %eax, %edx
+; X86-BMI1-NEXT:  .LBB56_6:
+; X86-BMI1-NEXT:    shrdl %cl, %ebx, %esi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    jne .LBB56_8
+; X86-BMI1-NEXT:  # %bb.7:
+; X86-BMI1-NEXT:    movl %esi, %eax
+; X86-BMI1-NEXT:  .LBB56_8:
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    popl %ebx
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr64_d0:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %edi
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    shrdl %cl, %edx, %eax
+; X86-BMI2-NEXT:    shrxl %ecx, %edx, %esi
+; X86-BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB56_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %esi, %eax
+; X86-BMI2-NEXT:    xorl %esi, %esi
+; X86-BMI2-NEXT:  .LBB56_2:
+; X86-BMI2-NEXT:    movb $64, %cl
+; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    shldl %cl, %eax, %esi
+; X86-BMI2-NEXT:    shlxl %ecx, %eax, %edi
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB56_4
+; X86-BMI2-NEXT:  # %bb.3:
+; X86-BMI2-NEXT:    movl %edi, %esi
+; X86-BMI2-NEXT:    movl $0, %edi
+; X86-BMI2-NEXT:  .LBB56_4:
+; X86-BMI2-NEXT:    shrxl %ecx, %esi, %eax
+; X86-BMI2-NEXT:    jne .LBB56_6
+; X86-BMI2-NEXT:  # %bb.5:
+; X86-BMI2-NEXT:    movl %eax, %edx
+; X86-BMI2-NEXT:  .LBB56_6:
+; X86-BMI2-NEXT:    shrdl %cl, %esi, %edi
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    jne .LBB56_8
+; X86-BMI2-NEXT:  # %bb.7:
+; X86-BMI2-NEXT:    movl %edi, %eax
+; X86-BMI2-NEXT:  .LBB56_8:
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %edi
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_d0:
 ; X64-NOBMI:       # %bb.0:
@@ -7088,19 +7088,19 @@ define i64 @bextr64_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    shrq %cl, %rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr64_d0:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrq %rax, %rdi, %rax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr64_d0:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %rax
-; X64-BMI1BMI2-NEXT:    bzhiq %rdx, %rax, %rax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr64_d0:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrq %rax, %rdi, %rax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr64_d0:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %rax
+; X64-BMI2-NEXT:    bzhiq %rdx, %rax, %rax
+; X64-BMI2-NEXT:    retq
   %shifted = lshr i64 %val, %numskipbits
   %numhighbits = sub i64 64, %numlowbits
   %highbitscleared = shl i64 %shifted, %numhighbits
@@ -7157,94 +7157,94 @@ define i64 @bextr64_d1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; X86-NOBMI-NEXT:    popl %ebx
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr64_d1_indexzext:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %ebx
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1NOTBM-NEXT:    movl %edx, %eax
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edx, %edi
-; X86-BMI1NOTBM-NEXT:    xorl %esi, %esi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB57_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
-; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB57_2:
-; X86-BMI1NOTBM-NEXT:    movb $64, %cl
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    shldl %cl, %edi, %eax
-; X86-BMI1NOTBM-NEXT:    shll %cl, %edi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    movl %edi, %ebx
-; X86-BMI1NOTBM-NEXT:    jne .LBB57_4
-; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl %eax, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB57_4:
-; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    movl $0, %edx
-; X86-BMI1NOTBM-NEXT:    jne .LBB57_6
-; X86-BMI1NOTBM-NEXT:  # %bb.5:
-; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB57_6:
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %ebx, %esi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB57_8
-; X86-BMI1NOTBM-NEXT:  # %bb.7:
-; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB57_8:
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    popl %ebx
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr64_d1_indexzext:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %edi
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %edx, %eax
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edx, %esi
-; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB57_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %esi, %eax
-; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
-; X86-BMI1BMI2-NEXT:  .LBB57_2:
-; X86-BMI1BMI2-NEXT:    movb $64, %cl
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %esi
-; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %edi
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB57_4
-; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    movl %edi, %esi
-; X86-BMI1BMI2-NEXT:    movl $0, %edi
-; X86-BMI1BMI2-NEXT:  .LBB57_4:
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    jne .LBB57_6
-; X86-BMI1BMI2-NEXT:  # %bb.5:
-; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB57_6:
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edi
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    jne .LBB57_8
-; X86-BMI1BMI2-NEXT:  # %bb.7:
-; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:  .LBB57_8:
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %edi
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr64_d1_indexzext:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %ebx
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI1-NEXT:    movl %edx, %eax
+; X86-BMI1-NEXT:    shrl %cl, %eax
+; X86-BMI1-NEXT:    shrdl %cl, %edx, %edi
+; X86-BMI1-NEXT:    xorl %esi, %esi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB57_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %eax, %edi
+; X86-BMI1-NEXT:    xorl %eax, %eax
+; X86-BMI1-NEXT:  .LBB57_2:
+; X86-BMI1-NEXT:    movb $64, %cl
+; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    shldl %cl, %edi, %eax
+; X86-BMI1-NEXT:    shll %cl, %edi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    movl %edi, %ebx
+; X86-BMI1-NEXT:    jne .LBB57_4
+; X86-BMI1-NEXT:  # %bb.3:
+; X86-BMI1-NEXT:    movl %eax, %ebx
+; X86-BMI1-NEXT:  .LBB57_4:
+; X86-BMI1-NEXT:    movl %ebx, %eax
+; X86-BMI1-NEXT:    shrl %cl, %eax
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    movl $0, %edx
+; X86-BMI1-NEXT:    jne .LBB57_6
+; X86-BMI1-NEXT:  # %bb.5:
+; X86-BMI1-NEXT:    movl %edi, %esi
+; X86-BMI1-NEXT:    movl %eax, %edx
+; X86-BMI1-NEXT:  .LBB57_6:
+; X86-BMI1-NEXT:    shrdl %cl, %ebx, %esi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    jne .LBB57_8
+; X86-BMI1-NEXT:  # %bb.7:
+; X86-BMI1-NEXT:    movl %esi, %eax
+; X86-BMI1-NEXT:  .LBB57_8:
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    popl %ebx
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr64_d1_indexzext:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %edi
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    shrdl %cl, %edx, %eax
+; X86-BMI2-NEXT:    shrxl %ecx, %edx, %esi
+; X86-BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB57_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %esi, %eax
+; X86-BMI2-NEXT:    xorl %esi, %esi
+; X86-BMI2-NEXT:  .LBB57_2:
+; X86-BMI2-NEXT:    movb $64, %cl
+; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    shldl %cl, %eax, %esi
+; X86-BMI2-NEXT:    shlxl %ecx, %eax, %edi
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB57_4
+; X86-BMI2-NEXT:  # %bb.3:
+; X86-BMI2-NEXT:    movl %edi, %esi
+; X86-BMI2-NEXT:    movl $0, %edi
+; X86-BMI2-NEXT:  .LBB57_4:
+; X86-BMI2-NEXT:    shrxl %ecx, %esi, %eax
+; X86-BMI2-NEXT:    jne .LBB57_6
+; X86-BMI2-NEXT:  # %bb.5:
+; X86-BMI2-NEXT:    movl %eax, %edx
+; X86-BMI2-NEXT:  .LBB57_6:
+; X86-BMI2-NEXT:    shrdl %cl, %esi, %edi
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    jne .LBB57_8
+; X86-BMI2-NEXT:  # %bb.7:
+; X86-BMI2-NEXT:    movl %edi, %eax
+; X86-BMI2-NEXT:  .LBB57_8:
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %edi
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_d1_indexzext:
 ; X64-NOBMI:       # %bb.0:
@@ -7258,21 +7258,21 @@ define i64 @bextr64_d1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; X64-NOBMI-NEXT:    shrq %cl, %rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr64_d1_indexzext:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrq %rax, %rdi, %rax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr64_d1_indexzext:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    # kill: def $edx killed $edx def $rdx
-; X64-BMI1BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %rax
-; X64-BMI1BMI2-NEXT:    bzhiq %rdx, %rax, %rax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr64_d1_indexzext:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrq %rax, %rdi, %rax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr64_d1_indexzext:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    # kill: def $edx killed $edx def $rdx
+; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %rax
+; X64-BMI2-NEXT:    bzhiq %rdx, %rax, %rax
+; X64-BMI2-NEXT:    retq
   %skip = zext i8 %numskipbits to i64
   %shifted = lshr i64 %val, %skip
   %numhighbits = sub i8 64, %numlowbits
@@ -7332,96 +7332,96 @@ define i64 @bextr64_d2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    popl %ebx
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr64_d2_load:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %ebx
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movl (%eax), %edi
-; X86-BMI1NOTBM-NEXT:    movl 4(%eax), %edx
-; X86-BMI1NOTBM-NEXT:    movl %edx, %eax
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edx, %edi
-; X86-BMI1NOTBM-NEXT:    xorl %esi, %esi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB58_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
-; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB58_2:
-; X86-BMI1NOTBM-NEXT:    movb $64, %cl
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    shldl %cl, %edi, %eax
-; X86-BMI1NOTBM-NEXT:    shll %cl, %edi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    movl %edi, %ebx
-; X86-BMI1NOTBM-NEXT:    jne .LBB58_4
-; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl %eax, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB58_4:
-; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    movl $0, %edx
-; X86-BMI1NOTBM-NEXT:    jne .LBB58_6
-; X86-BMI1NOTBM-NEXT:  # %bb.5:
-; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB58_6:
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %ebx, %esi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB58_8
-; X86-BMI1NOTBM-NEXT:  # %bb.7:
-; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB58_8:
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    popl %ebx
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr64_d2_load:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %edi
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT:    movl (%edx), %eax
-; X86-BMI1BMI2-NEXT:    movl 4(%edx), %edx
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edx, %esi
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %edx, %eax
-; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB58_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %esi, %eax
-; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
-; X86-BMI1BMI2-NEXT:  .LBB58_2:
-; X86-BMI1BMI2-NEXT:    movb $64, %cl
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %esi
-; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %edi
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB58_4
-; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    movl %edi, %esi
-; X86-BMI1BMI2-NEXT:    movl $0, %edi
-; X86-BMI1BMI2-NEXT:  .LBB58_4:
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    jne .LBB58_6
-; X86-BMI1BMI2-NEXT:  # %bb.5:
-; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB58_6:
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edi
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    jne .LBB58_8
-; X86-BMI1BMI2-NEXT:  # %bb.7:
-; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:  .LBB58_8:
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %edi
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr64_d2_load:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %ebx
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movl (%eax), %edi
+; X86-BMI1-NEXT:    movl 4(%eax), %edx
+; X86-BMI1-NEXT:    movl %edx, %eax
+; X86-BMI1-NEXT:    shrl %cl, %eax
+; X86-BMI1-NEXT:    shrdl %cl, %edx, %edi
+; X86-BMI1-NEXT:    xorl %esi, %esi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB58_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %eax, %edi
+; X86-BMI1-NEXT:    xorl %eax, %eax
+; X86-BMI1-NEXT:  .LBB58_2:
+; X86-BMI1-NEXT:    movb $64, %cl
+; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    shldl %cl, %edi, %eax
+; X86-BMI1-NEXT:    shll %cl, %edi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    movl %edi, %ebx
+; X86-BMI1-NEXT:    jne .LBB58_4
+; X86-BMI1-NEXT:  # %bb.3:
+; X86-BMI1-NEXT:    movl %eax, %ebx
+; X86-BMI1-NEXT:  .LBB58_4:
+; X86-BMI1-NEXT:    movl %ebx, %eax
+; X86-BMI1-NEXT:    shrl %cl, %eax
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    movl $0, %edx
+; X86-BMI1-NEXT:    jne .LBB58_6
+; X86-BMI1-NEXT:  # %bb.5:
+; X86-BMI1-NEXT:    movl %edi, %esi
+; X86-BMI1-NEXT:    movl %eax, %edx
+; X86-BMI1-NEXT:  .LBB58_6:
+; X86-BMI1-NEXT:    shrdl %cl, %ebx, %esi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    jne .LBB58_8
+; X86-BMI1-NEXT:  # %bb.7:
+; X86-BMI1-NEXT:    movl %esi, %eax
+; X86-BMI1-NEXT:  .LBB58_8:
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    popl %ebx
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr64_d2_load:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %edi
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    movl (%edx), %eax
+; X86-BMI2-NEXT:    movl 4(%edx), %edx
+; X86-BMI2-NEXT:    shrxl %ecx, %edx, %esi
+; X86-BMI2-NEXT:    shrdl %cl, %edx, %eax
+; X86-BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB58_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %esi, %eax
+; X86-BMI2-NEXT:    xorl %esi, %esi
+; X86-BMI2-NEXT:  .LBB58_2:
+; X86-BMI2-NEXT:    movb $64, %cl
+; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    shldl %cl, %eax, %esi
+; X86-BMI2-NEXT:    shlxl %ecx, %eax, %edi
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB58_4
+; X86-BMI2-NEXT:  # %bb.3:
+; X86-BMI2-NEXT:    movl %edi, %esi
+; X86-BMI2-NEXT:    movl $0, %edi
+; X86-BMI2-NEXT:  .LBB58_4:
+; X86-BMI2-NEXT:    shrxl %ecx, %esi, %eax
+; X86-BMI2-NEXT:    jne .LBB58_6
+; X86-BMI2-NEXT:  # %bb.5:
+; X86-BMI2-NEXT:    movl %eax, %edx
+; X86-BMI2-NEXT:  .LBB58_6:
+; X86-BMI2-NEXT:    shrdl %cl, %esi, %edi
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    jne .LBB58_8
+; X86-BMI2-NEXT:  # %bb.7:
+; X86-BMI2-NEXT:    movl %edi, %eax
+; X86-BMI2-NEXT:  .LBB58_8:
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %edi
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_d2_load:
 ; X64-NOBMI:       # %bb.0:
@@ -7435,19 +7435,19 @@ define i64 @bextr64_d2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X64-NOBMI-NEXT:    shrq %cl, %rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr64_d2_load:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrq %rax, (%rdi), %rax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr64_d2_load:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, (%rdi), %rax
-; X64-BMI1BMI2-NEXT:    bzhiq %rdx, %rax, %rax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr64_d2_load:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrq %rax, (%rdi), %rax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr64_d2_load:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxq %rsi, (%rdi), %rax
+; X64-BMI2-NEXT:    bzhiq %rdx, %rax, %rax
+; X64-BMI2-NEXT:    retq
   %val = load i64, i64* %w
   %shifted = lshr i64 %val, %numskipbits
   %numhighbits = sub i64 64, %numlowbits
@@ -7506,96 +7506,96 @@ define i64 @bextr64_d3_load_indexzext(i64* %w, i8 %numskipbits, i8 %numlowbits)
 ; X86-NOBMI-NEXT:    popl %ebx
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr64_d3_load_indexzext:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %ebx
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movl (%eax), %edi
-; X86-BMI1NOTBM-NEXT:    movl 4(%eax), %edx
-; X86-BMI1NOTBM-NEXT:    movl %edx, %eax
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edx, %edi
-; X86-BMI1NOTBM-NEXT:    xorl %esi, %esi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB59_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
-; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB59_2:
-; X86-BMI1NOTBM-NEXT:    movb $64, %cl
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    shldl %cl, %edi, %eax
-; X86-BMI1NOTBM-NEXT:    shll %cl, %edi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    movl %edi, %ebx
-; X86-BMI1NOTBM-NEXT:    jne .LBB59_4
-; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl %eax, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB59_4:
-; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    movl $0, %edx
-; X86-BMI1NOTBM-NEXT:    jne .LBB59_6
-; X86-BMI1NOTBM-NEXT:  # %bb.5:
-; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB59_6:
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %ebx, %esi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB59_8
-; X86-BMI1NOTBM-NEXT:  # %bb.7:
-; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB59_8:
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    popl %ebx
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr64_d3_load_indexzext:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %edi
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT:    movl (%edx), %eax
-; X86-BMI1BMI2-NEXT:    movl 4(%edx), %edx
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edx, %esi
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %edx, %eax
-; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB59_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %esi, %eax
-; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
-; X86-BMI1BMI2-NEXT:  .LBB59_2:
-; X86-BMI1BMI2-NEXT:    movb $64, %cl
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %esi
-; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %edi
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB59_4
-; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    movl %edi, %esi
-; X86-BMI1BMI2-NEXT:    movl $0, %edi
-; X86-BMI1BMI2-NEXT:  .LBB59_4:
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    jne .LBB59_6
-; X86-BMI1BMI2-NEXT:  # %bb.5:
-; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB59_6:
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edi
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    jne .LBB59_8
-; X86-BMI1BMI2-NEXT:  # %bb.7:
-; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:  .LBB59_8:
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %edi
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr64_d3_load_indexzext:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %ebx
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movl (%eax), %edi
+; X86-BMI1-NEXT:    movl 4(%eax), %edx
+; X86-BMI1-NEXT:    movl %edx, %eax
+; X86-BMI1-NEXT:    shrl %cl, %eax
+; X86-BMI1-NEXT:    shrdl %cl, %edx, %edi
+; X86-BMI1-NEXT:    xorl %esi, %esi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB59_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %eax, %edi
+; X86-BMI1-NEXT:    xorl %eax, %eax
+; X86-BMI1-NEXT:  .LBB59_2:
+; X86-BMI1-NEXT:    movb $64, %cl
+; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    shldl %cl, %edi, %eax
+; X86-BMI1-NEXT:    shll %cl, %edi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    movl %edi, %ebx
+; X86-BMI1-NEXT:    jne .LBB59_4
+; X86-BMI1-NEXT:  # %bb.3:
+; X86-BMI1-NEXT:    movl %eax, %ebx
+; X86-BMI1-NEXT:  .LBB59_4:
+; X86-BMI1-NEXT:    movl %ebx, %eax
+; X86-BMI1-NEXT:    shrl %cl, %eax
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    movl $0, %edx
+; X86-BMI1-NEXT:    jne .LBB59_6
+; X86-BMI1-NEXT:  # %bb.5:
+; X86-BMI1-NEXT:    movl %edi, %esi
+; X86-BMI1-NEXT:    movl %eax, %edx
+; X86-BMI1-NEXT:  .LBB59_6:
+; X86-BMI1-NEXT:    shrdl %cl, %ebx, %esi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    jne .LBB59_8
+; X86-BMI1-NEXT:  # %bb.7:
+; X86-BMI1-NEXT:    movl %esi, %eax
+; X86-BMI1-NEXT:  .LBB59_8:
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    popl %ebx
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr64_d3_load_indexzext:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %edi
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    movl (%edx), %eax
+; X86-BMI2-NEXT:    movl 4(%edx), %edx
+; X86-BMI2-NEXT:    shrxl %ecx, %edx, %esi
+; X86-BMI2-NEXT:    shrdl %cl, %edx, %eax
+; X86-BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB59_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %esi, %eax
+; X86-BMI2-NEXT:    xorl %esi, %esi
+; X86-BMI2-NEXT:  .LBB59_2:
+; X86-BMI2-NEXT:    movb $64, %cl
+; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    shldl %cl, %eax, %esi
+; X86-BMI2-NEXT:    shlxl %ecx, %eax, %edi
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB59_4
+; X86-BMI2-NEXT:  # %bb.3:
+; X86-BMI2-NEXT:    movl %edi, %esi
+; X86-BMI2-NEXT:    movl $0, %edi
+; X86-BMI2-NEXT:  .LBB59_4:
+; X86-BMI2-NEXT:    shrxl %ecx, %esi, %eax
+; X86-BMI2-NEXT:    jne .LBB59_6
+; X86-BMI2-NEXT:  # %bb.5:
+; X86-BMI2-NEXT:    movl %eax, %edx
+; X86-BMI2-NEXT:  .LBB59_6:
+; X86-BMI2-NEXT:    shrdl %cl, %esi, %edi
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    jne .LBB59_8
+; X86-BMI2-NEXT:  # %bb.7:
+; X86-BMI2-NEXT:    movl %edi, %eax
+; X86-BMI2-NEXT:  .LBB59_8:
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %edi
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_d3_load_indexzext:
 ; X64-NOBMI:       # %bb.0:
@@ -7609,21 +7609,21 @@ define i64 @bextr64_d3_load_indexzext(i64* %w, i8 %numskipbits, i8 %numlowbits)
 ; X64-NOBMI-NEXT:    shrq %cl, %rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr64_d3_load_indexzext:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrq %rax, (%rdi), %rax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr64_d3_load_indexzext:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    # kill: def $edx killed $edx def $rdx
-; X64-BMI1BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, (%rdi), %rax
-; X64-BMI1BMI2-NEXT:    bzhiq %rdx, %rax, %rax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr64_d3_load_indexzext:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrq %rax, (%rdi), %rax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr64_d3_load_indexzext:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    # kill: def $edx killed $edx def $rdx
+; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-BMI2-NEXT:    shrxq %rsi, (%rdi), %rax
+; X64-BMI2-NEXT:    bzhiq %rdx, %rax, %rax
+; X64-BMI2-NEXT:    retq
   %val = load i64, i64* %w
   %skip = zext i8 %numskipbits to i64
   %shifted = lshr i64 %val, %skip
@@ -7696,118 +7696,118 @@ define i64 @bextr64_d5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
 ; X86-NOBMI-NEXT:    popl %ebp
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr64_d5_skipextrauses:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %ebp
-; X86-BMI1NOTBM-NEXT:    pushl %ebx
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    subl $12, %esp
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movl %edx, %esi
-; X86-BMI1NOTBM-NEXT:    movl %eax, %ecx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %esi
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edx, %ebx
-; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
-; X86-BMI1NOTBM-NEXT:    testb $32, %al
-; X86-BMI1NOTBM-NEXT:    je .LBB60_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %esi, %ebx
-; X86-BMI1NOTBM-NEXT:    xorl %esi, %esi
-; X86-BMI1NOTBM-NEXT:  .LBB60_2:
-; X86-BMI1NOTBM-NEXT:    movb $64, %cl
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    shldl %cl, %ebx, %esi
-; X86-BMI1NOTBM-NEXT:    shll %cl, %ebx
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    movl %ebx, %ebp
-; X86-BMI1NOTBM-NEXT:    jne .LBB60_4
-; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl %esi, %ebp
-; X86-BMI1NOTBM-NEXT:  .LBB60_4:
-; X86-BMI1NOTBM-NEXT:    movl %ebp, %esi
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %esi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    movl $0, %edi
-; X86-BMI1NOTBM-NEXT:    jne .LBB60_6
-; X86-BMI1NOTBM-NEXT:  # %bb.5:
-; X86-BMI1NOTBM-NEXT:    movl %ebx, %edx
-; X86-BMI1NOTBM-NEXT:    movl %esi, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB60_6:
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %ebp, %edx
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1NOTBM-NEXT:    jne .LBB60_8
-; X86-BMI1NOTBM-NEXT:  # %bb.7:
-; X86-BMI1NOTBM-NEXT:    movl %edx, %esi
-; X86-BMI1NOTBM-NEXT:  .LBB60_8:
-; X86-BMI1NOTBM-NEXT:    subl $8, %esp
-; X86-BMI1NOTBM-NEXT:    pushl %ecx
-; X86-BMI1NOTBM-NEXT:    pushl %eax
-; X86-BMI1NOTBM-NEXT:    calll use64 at PLT
-; X86-BMI1NOTBM-NEXT:    addl $16, %esp
-; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:    movl %edi, %edx
-; X86-BMI1NOTBM-NEXT:    addl $12, %esp
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    popl %ebx
-; X86-BMI1NOTBM-NEXT:    popl %ebp
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr64_d5_skipextrauses:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    pushl %edi
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    movl %eax, %ecx
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %edx, %edi
-; X86-BMI1BMI2-NEXT:    shrxl %eax, %edx, %edx
-; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
-; X86-BMI1BMI2-NEXT:    testb $32, %al
-; X86-BMI1BMI2-NEXT:    je .LBB60_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %edx, %edi
-; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI1BMI2-NEXT:  .LBB60_2:
-; X86-BMI1BMI2-NEXT:    movb $64, %cl
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    shldl %cl, %edi, %edx
-; X86-BMI1BMI2-NEXT:    shlxl %ecx, %edi, %ebx
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB60_4
-; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    movl %ebx, %edx
-; X86-BMI1BMI2-NEXT:    movl $0, %ebx
-; X86-BMI1BMI2-NEXT:  .LBB60_4:
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edx, %edi
-; X86-BMI1BMI2-NEXT:    jne .LBB60_6
-; X86-BMI1BMI2-NEXT:  # %bb.5:
-; X86-BMI1BMI2-NEXT:    movl %edi, %esi
-; X86-BMI1BMI2-NEXT:  .LBB60_6:
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %edx, %ebx
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    jne .LBB60_8
-; X86-BMI1BMI2-NEXT:  # %bb.7:
-; X86-BMI1BMI2-NEXT:    movl %ebx, %edi
-; X86-BMI1BMI2-NEXT:  .LBB60_8:
-; X86-BMI1BMI2-NEXT:    subl $8, %esp
-; X86-BMI1BMI2-NEXT:    pushl %ecx
-; X86-BMI1BMI2-NEXT:    pushl %eax
-; X86-BMI1BMI2-NEXT:    calll use64 at PLT
-; X86-BMI1BMI2-NEXT:    addl $16, %esp
-; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:    movl %esi, %edx
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %edi
-; X86-BMI1BMI2-NEXT:    popl %ebx
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr64_d5_skipextrauses:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %ebp
+; X86-BMI1-NEXT:    pushl %ebx
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    subl $12, %esp
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movl %edx, %esi
+; X86-BMI1-NEXT:    movl %eax, %ecx
+; X86-BMI1-NEXT:    shrl %cl, %esi
+; X86-BMI1-NEXT:    shrdl %cl, %edx, %ebx
+; X86-BMI1-NEXT:    xorl %edx, %edx
+; X86-BMI1-NEXT:    testb $32, %al
+; X86-BMI1-NEXT:    je .LBB60_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %esi, %ebx
+; X86-BMI1-NEXT:    xorl %esi, %esi
+; X86-BMI1-NEXT:  .LBB60_2:
+; X86-BMI1-NEXT:    movb $64, %cl
+; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    shldl %cl, %ebx, %esi
+; X86-BMI1-NEXT:    shll %cl, %ebx
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    movl %ebx, %ebp
+; X86-BMI1-NEXT:    jne .LBB60_4
+; X86-BMI1-NEXT:  # %bb.3:
+; X86-BMI1-NEXT:    movl %esi, %ebp
+; X86-BMI1-NEXT:  .LBB60_4:
+; X86-BMI1-NEXT:    movl %ebp, %esi
+; X86-BMI1-NEXT:    shrl %cl, %esi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    movl $0, %edi
+; X86-BMI1-NEXT:    jne .LBB60_6
+; X86-BMI1-NEXT:  # %bb.5:
+; X86-BMI1-NEXT:    movl %ebx, %edx
+; X86-BMI1-NEXT:    movl %esi, %edi
+; X86-BMI1-NEXT:  .LBB60_6:
+; X86-BMI1-NEXT:    shrdl %cl, %ebp, %edx
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT:    jne .LBB60_8
+; X86-BMI1-NEXT:  # %bb.7:
+; X86-BMI1-NEXT:    movl %edx, %esi
+; X86-BMI1-NEXT:  .LBB60_8:
+; X86-BMI1-NEXT:    subl $8, %esp
+; X86-BMI1-NEXT:    pushl %ecx
+; X86-BMI1-NEXT:    pushl %eax
+; X86-BMI1-NEXT:    calll use64 at PLT
+; X86-BMI1-NEXT:    addl $16, %esp
+; X86-BMI1-NEXT:    movl %esi, %eax
+; X86-BMI1-NEXT:    movl %edi, %edx
+; X86-BMI1-NEXT:    addl $12, %esp
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    popl %ebx
+; X86-BMI1-NEXT:    popl %ebp
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr64_d5_skipextrauses:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    pushl %edi
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movl %eax, %ecx
+; X86-BMI2-NEXT:    shrdl %cl, %edx, %edi
+; X86-BMI2-NEXT:    shrxl %eax, %edx, %edx
+; X86-BMI2-NEXT:    xorl %esi, %esi
+; X86-BMI2-NEXT:    testb $32, %al
+; X86-BMI2-NEXT:    je .LBB60_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %edx, %edi
+; X86-BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI2-NEXT:  .LBB60_2:
+; X86-BMI2-NEXT:    movb $64, %cl
+; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    shldl %cl, %edi, %edx
+; X86-BMI2-NEXT:    shlxl %ecx, %edi, %ebx
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB60_4
+; X86-BMI2-NEXT:  # %bb.3:
+; X86-BMI2-NEXT:    movl %ebx, %edx
+; X86-BMI2-NEXT:    movl $0, %ebx
+; X86-BMI2-NEXT:  .LBB60_4:
+; X86-BMI2-NEXT:    shrxl %ecx, %edx, %edi
+; X86-BMI2-NEXT:    jne .LBB60_6
+; X86-BMI2-NEXT:  # %bb.5:
+; X86-BMI2-NEXT:    movl %edi, %esi
+; X86-BMI2-NEXT:  .LBB60_6:
+; X86-BMI2-NEXT:    shrdl %cl, %edx, %ebx
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    jne .LBB60_8
+; X86-BMI2-NEXT:  # %bb.7:
+; X86-BMI2-NEXT:    movl %ebx, %edi
+; X86-BMI2-NEXT:  .LBB60_8:
+; X86-BMI2-NEXT:    subl $8, %esp
+; X86-BMI2-NEXT:    pushl %ecx
+; X86-BMI2-NEXT:    pushl %eax
+; X86-BMI2-NEXT:    calll use64 at PLT
+; X86-BMI2-NEXT:    addl $16, %esp
+; X86-BMI2-NEXT:    movl %edi, %eax
+; X86-BMI2-NEXT:    movl %esi, %edx
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %edi
+; X86-BMI2-NEXT:    popl %ebx
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_d5_skipextrauses:
 ; X64-NOBMI:       # %bb.0:
@@ -7825,29 +7825,29 @@ define i64 @bextr64_d5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
 ; X64-NOBMI-NEXT:    popq %rbx
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr64_d5_skipextrauses:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    pushq %rbx
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrq %rax, %rdi, %rbx
-; X64-BMI1NOTBM-NEXT:    movq %rsi, %rdi
-; X64-BMI1NOTBM-NEXT:    callq use64 at PLT
-; X64-BMI1NOTBM-NEXT:    movq %rbx, %rax
-; X64-BMI1NOTBM-NEXT:    popq %rbx
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr64_d5_skipextrauses:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    pushq %rbx
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %rax
-; X64-BMI1BMI2-NEXT:    bzhiq %rdx, %rax, %rbx
-; X64-BMI1BMI2-NEXT:    movq %rsi, %rdi
-; X64-BMI1BMI2-NEXT:    callq use64 at PLT
-; X64-BMI1BMI2-NEXT:    movq %rbx, %rax
-; X64-BMI1BMI2-NEXT:    popq %rbx
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr64_d5_skipextrauses:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    pushq %rbx
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrq %rax, %rdi, %rbx
+; X64-BMI1-NEXT:    movq %rsi, %rdi
+; X64-BMI1-NEXT:    callq use64 at PLT
+; X64-BMI1-NEXT:    movq %rbx, %rax
+; X64-BMI1-NEXT:    popq %rbx
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr64_d5_skipextrauses:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    pushq %rbx
+; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %rax
+; X64-BMI2-NEXT:    bzhiq %rdx, %rax, %rbx
+; X64-BMI2-NEXT:    movq %rsi, %rdi
+; X64-BMI2-NEXT:    callq use64 at PLT
+; X64-BMI2-NEXT:    movq %rbx, %rax
+; X64-BMI2-NEXT:    popq %rbx
+; X64-BMI2-NEXT:    retq
   %shifted = lshr i64 %val, %numskipbits
   %numhighbits = sub i64 64, %numlowbits
   %highbitscleared = shl i64 %shifted, %numhighbits
@@ -7895,71 +7895,71 @@ define i32 @bextr64_32_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr64_32_d0:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %esi, %edx
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB61_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB61_2:
-; X86-BMI1NOTBM-NEXT:    movb $64, %cl
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    shldl %cl, %edx, %eax
-; X86-BMI1NOTBM-NEXT:    shll %cl, %edx
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB61_4
-; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl %edx, %eax
-; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB61_4:
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %edx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB61_6
-; X86-BMI1NOTBM-NEXT:  # %bb.5:
-; X86-BMI1NOTBM-NEXT:    movl %edx, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB61_6:
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr64_32_d0:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %edx, %eax
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edx, %edx
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB61_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %edx, %eax
-; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI1BMI2-NEXT:  .LBB61_2:
-; X86-BMI1BMI2-NEXT:    movb $64, %cl
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %edx
-; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %eax
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB61_4
-; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:    xorl %eax, %eax
-; X86-BMI1BMI2-NEXT:  .LBB61_4:
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %edx, %eax
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB61_6
-; X86-BMI1BMI2-NEXT:  # %bb.5:
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edx, %eax
-; X86-BMI1BMI2-NEXT:  .LBB61_6:
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr64_32_d0:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    movl %esi, %eax
+; X86-BMI1-NEXT:    shrl %cl, %eax
+; X86-BMI1-NEXT:    shrdl %cl, %esi, %edx
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB61_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %eax, %edx
+; X86-BMI1-NEXT:    xorl %eax, %eax
+; X86-BMI1-NEXT:  .LBB61_2:
+; X86-BMI1-NEXT:    movb $64, %cl
+; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    shldl %cl, %edx, %eax
+; X86-BMI1-NEXT:    shll %cl, %edx
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB61_4
+; X86-BMI1-NEXT:  # %bb.3:
+; X86-BMI1-NEXT:    movl %edx, %eax
+; X86-BMI1-NEXT:    xorl %edx, %edx
+; X86-BMI1-NEXT:  .LBB61_4:
+; X86-BMI1-NEXT:    shrdl %cl, %eax, %edx
+; X86-BMI1-NEXT:    shrl %cl, %eax
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    jne .LBB61_6
+; X86-BMI1-NEXT:  # %bb.5:
+; X86-BMI1-NEXT:    movl %edx, %eax
+; X86-BMI1-NEXT:  .LBB61_6:
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr64_32_d0:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    shrdl %cl, %edx, %eax
+; X86-BMI2-NEXT:    shrxl %ecx, %edx, %edx
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB61_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %edx, %eax
+; X86-BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI2-NEXT:  .LBB61_2:
+; X86-BMI2-NEXT:    movb $64, %cl
+; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
+; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB61_4
+; X86-BMI2-NEXT:  # %bb.3:
+; X86-BMI2-NEXT:    movl %eax, %edx
+; X86-BMI2-NEXT:    xorl %eax, %eax
+; X86-BMI2-NEXT:  .LBB61_4:
+; X86-BMI2-NEXT:    shrdl %cl, %edx, %eax
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB61_6
+; X86-BMI2-NEXT:  # %bb.5:
+; X86-BMI2-NEXT:    shrxl %ecx, %edx, %eax
+; X86-BMI2-NEXT:  .LBB61_6:
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_32_d0:
 ; X64-NOBMI:       # %bb.0:
@@ -7974,21 +7974,21 @@ define i32 @bextr64_32_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr64_32_d0:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrq %rax, %rdi, %rax
-; X64-BMI1NOTBM-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr64_32_d0:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %rax
-; X64-BMI1BMI2-NEXT:    bzhiq %rdx, %rax, %rax
-; X64-BMI1BMI2-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr64_32_d0:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrq %rax, %rdi, %rax
+; X64-BMI1-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr64_32_d0:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %rax
+; X64-BMI2-NEXT:    bzhiq %rdx, %rax, %rax
+; X64-BMI2-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-BMI2-NEXT:    retq
   %shifted = lshr i64 %val, %numskipbits
   %numhighbits = sub i64 64, %numlowbits
   %highbitscleared = shl i64 %shifted, %numhighbits
@@ -8021,44 +8021,44 @@ define i32 @bextr64_32_d1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bextr64_32_d1:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-BMI1NOTBM-NEXT:    movl %edi, %edx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %esi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB62_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %esi, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB62_2:
-; X86-BMI1NOTBM-NEXT:    shll $8, %eax
-; X86-BMI1NOTBM-NEXT:    bextrl %eax, %edx, %eax
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bextr64_32_d1:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edx
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB62_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %edx
-; X86-BMI1BMI2-NEXT:  .LBB62_2:
-; X86-BMI1BMI2-NEXT:    bzhil %eax, %edx, %eax
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bextr64_32_d1:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI1-NEXT:    movl %edi, %edx
+; X86-BMI1-NEXT:    shrl %cl, %edx
+; X86-BMI1-NEXT:    shrdl %cl, %edi, %esi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    jne .LBB62_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %esi, %edx
+; X86-BMI1-NEXT:  .LBB62_2:
+; X86-BMI1-NEXT:    shll $8, %eax
+; X86-BMI1-NEXT:    bextrl %eax, %edx, %eax
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bextr64_32_d1:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    shrdl %cl, %esi, %edx
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB62_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    shrxl %ecx, %esi, %edx
+; X86-BMI2-NEXT:  .LBB62_2:
+; X86-BMI2-NEXT:    bzhil %eax, %edx, %eax
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_32_d1:
 ; X64-NOBMI:       # %bb.0:
@@ -8073,20 +8073,20 @@ define i32 @bextr64_32_d1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind
 ; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bextr64_32_d1:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    movzbl %sil, %eax
-; X64-BMI1NOTBM-NEXT:    orl %edx, %eax
-; X64-BMI1NOTBM-NEXT:    bextrq %rax, %rdi, %rax
-; X64-BMI1NOTBM-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bextr64_32_d1:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %rax
-; X64-BMI1BMI2-NEXT:    bzhil %edx, %eax, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bextr64_32_d1:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %edx
+; X64-BMI1-NEXT:    movzbl %sil, %eax
+; X64-BMI1-NEXT:    orl %edx, %eax
+; X64-BMI1-NEXT:    bextrq %rax, %rdi, %rax
+; X64-BMI1-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bextr64_32_d1:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %rax
+; X64-BMI2-NEXT:    bzhil %edx, %eax, %eax
+; X64-BMI2-NEXT:    retq
   %shifted = lshr i64 %val, %numskipbits
   %truncshifted = trunc i64 %shifted to i32
   %numhighbits = sub i32 32, %numlowbits
@@ -8111,31 +8111,22 @@ define void @pr38938(i32* %a0, i64* %a1) nounwind {
 ; X86-NOBMI-NEXT:    incl (%eax,%ecx)
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: pr38938:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1NOTBM-NEXT:    movl $2581, %edx # imm = 0xA15
-; X86-BMI1NOTBM-NEXT:    bextrl %edx, (%ecx), %ecx
-; X86-BMI1NOTBM-NEXT:    incl (%eax,%ecx,4)
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1TBM-LABEL: pr38938:
-; X86-BMI1TBM:       # %bb.0:
-; X86-BMI1TBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1TBM-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1TBM-NEXT:    bextrl $2581, (%ecx), %ecx # imm = 0xA15
-; X86-BMI1TBM-NEXT:    incl (%eax,%ecx,4)
-; X86-BMI1TBM-NEXT:    retl
-;
-; X86-BMI1NOTBMBMI2-LABEL: pr38938:
-; X86-BMI1NOTBMBMI2:       # %bb.0:
-; X86-BMI1NOTBMBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBMBMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1NOTBMBMI2-NEXT:    movl $2581, %edx # imm = 0xA15
-; X86-BMI1NOTBMBMI2-NEXT:    bextrl %edx, (%ecx), %ecx
-; X86-BMI1NOTBMBMI2-NEXT:    incl (%eax,%ecx,4)
-; X86-BMI1NOTBMBMI2-NEXT:    retl
+; X86-BMINOTBM-LABEL: pr38938:
+; X86-BMINOTBM:       # %bb.0:
+; X86-BMINOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMINOTBM-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMINOTBM-NEXT:    movl $2581, %edx # imm = 0xA15
+; X86-BMINOTBM-NEXT:    bextrl %edx, (%ecx), %ecx
+; X86-BMINOTBM-NEXT:    incl (%eax,%ecx,4)
+; X86-BMINOTBM-NEXT:    retl
+;
+; X86-BMITBM-LABEL: pr38938:
+; X86-BMITBM:       # %bb.0:
+; X86-BMITBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMITBM-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMITBM-NEXT:    bextrl $2581, (%ecx), %ecx # imm = 0xA15
+; X86-BMITBM-NEXT:    incl (%eax,%ecx,4)
+; X86-BMITBM-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: pr38938:
 ; X64-NOBMI:       # %bb.0:
@@ -8145,25 +8136,18 @@ define void @pr38938(i32* %a0, i64* %a1) nounwind {
 ; X64-NOBMI-NEXT:    incl (%rdi,%rax)
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: pr38938:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    movl $2581, %eax # imm = 0xA15
-; X64-BMI1NOTBM-NEXT:    bextrq %rax, (%rsi), %rax
-; X64-BMI1NOTBM-NEXT:    incl (%rdi,%rax,4)
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1TBM-LABEL: pr38938:
-; X64-BMI1TBM:       # %bb.0:
-; X64-BMI1TBM-NEXT:    bextrq $2581, (%rsi), %rax # imm = 0xA15
-; X64-BMI1TBM-NEXT:    incl (%rdi,%rax,4)
-; X64-BMI1TBM-NEXT:    retq
-;
-; X64-BMI1NOTBMBMI2-LABEL: pr38938:
-; X64-BMI1NOTBMBMI2:       # %bb.0:
-; X64-BMI1NOTBMBMI2-NEXT:    movl $2581, %eax # imm = 0xA15
-; X64-BMI1NOTBMBMI2-NEXT:    bextrq %rax, (%rsi), %rax
-; X64-BMI1NOTBMBMI2-NEXT:    incl (%rdi,%rax,4)
-; X64-BMI1NOTBMBMI2-NEXT:    retq
+; X64-BMINOTBM-LABEL: pr38938:
+; X64-BMINOTBM:       # %bb.0:
+; X64-BMINOTBM-NEXT:    movl $2581, %eax # imm = 0xA15
+; X64-BMINOTBM-NEXT:    bextrq %rax, (%rsi), %rax
+; X64-BMINOTBM-NEXT:    incl (%rdi,%rax,4)
+; X64-BMINOTBM-NEXT:    retq
+;
+; X64-BMITBM-LABEL: pr38938:
+; X64-BMITBM:       # %bb.0:
+; X64-BMITBM-NEXT:    bextrq $2581, (%rsi), %rax # imm = 0xA15
+; X64-BMITBM-NEXT:    incl (%rdi,%rax,4)
+; X64-BMITBM-NEXT:    retq
   %tmp = load i64, i64* %a1, align 8
   %tmp1 = lshr i64 %tmp, 21
   %tmp2 = and i64 %tmp1, 1023
@@ -8183,22 +8167,16 @@ define i32 @c0_i32(i32 %arg) nounwind {
 ; X86-NOBMI-NEXT:    andl $1023, %eax # imm = 0x3FF
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: c0_i32:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movl $2579, %eax # imm = 0xA13
-; X86-BMI1NOTBM-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    retl
+; X86-BMINOTBM-LABEL: c0_i32:
+; X86-BMINOTBM:       # %bb.0:
+; X86-BMINOTBM-NEXT:    movl $2579, %eax # imm = 0xA13
+; X86-BMINOTBM-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMINOTBM-NEXT:    retl
 ;
-; X86-BMI1TBM-LABEL: c0_i32:
-; X86-BMI1TBM:       # %bb.0:
-; X86-BMI1TBM-NEXT:    bextrl $2579, {{[0-9]+}}(%esp), %eax # imm = 0xA13
-; X86-BMI1TBM-NEXT:    retl
-;
-; X86-BMI1NOTBMBMI2-LABEL: c0_i32:
-; X86-BMI1NOTBMBMI2:       # %bb.0:
-; X86-BMI1NOTBMBMI2-NEXT:    movl $2579, %eax # imm = 0xA13
-; X86-BMI1NOTBMBMI2-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBMBMI2-NEXT:    retl
+; X86-BMITBM-LABEL: c0_i32:
+; X86-BMITBM:       # %bb.0:
+; X86-BMITBM-NEXT:    bextrl $2579, {{[0-9]+}}(%esp), %eax # imm = 0xA13
+; X86-BMITBM-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: c0_i32:
 ; X64-NOBMI:       # %bb.0:
@@ -8207,22 +8185,16 @@ define i32 @c0_i32(i32 %arg) nounwind {
 ; X64-NOBMI-NEXT:    andl $1023, %eax # imm = 0x3FF
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: c0_i32:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    movl $2579, %eax # imm = 0xA13
-; X64-BMI1NOTBM-NEXT:    bextrl %eax, %edi, %eax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1TBM-LABEL: c0_i32:
-; X64-BMI1TBM:       # %bb.0:
-; X64-BMI1TBM-NEXT:    bextrl $2579, %edi, %eax # imm = 0xA13
-; X64-BMI1TBM-NEXT:    retq
-;
-; X64-BMI1NOTBMBMI2-LABEL: c0_i32:
-; X64-BMI1NOTBMBMI2:       # %bb.0:
-; X64-BMI1NOTBMBMI2-NEXT:    movl $2579, %eax # imm = 0xA13
-; X64-BMI1NOTBMBMI2-NEXT:    bextrl %eax, %edi, %eax
-; X64-BMI1NOTBMBMI2-NEXT:    retq
+; X64-BMINOTBM-LABEL: c0_i32:
+; X64-BMINOTBM:       # %bb.0:
+; X64-BMINOTBM-NEXT:    movl $2579, %eax # imm = 0xA13
+; X64-BMINOTBM-NEXT:    bextrl %eax, %edi, %eax
+; X64-BMINOTBM-NEXT:    retq
+;
+; X64-BMITBM-LABEL: c0_i32:
+; X64-BMITBM:       # %bb.0:
+; X64-BMITBM-NEXT:    bextrl $2579, %edi, %eax # imm = 0xA13
+; X64-BMITBM-NEXT:    retq
   %tmp0 = lshr i32 %arg, 19
   %tmp1 = and i32 %tmp0, 1023
   ret i32 %tmp1
@@ -8301,25 +8273,18 @@ define i64 @c0_i64(i64 %arg) nounwind {
 ; X86-NOBMI-NEXT:    xorl %edx, %edx
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: c0_i64:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movl $2579, %eax # imm = 0xA13
-; X86-BMI1NOTBM-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1TBM-LABEL: c0_i64:
-; X86-BMI1TBM:       # %bb.0:
-; X86-BMI1TBM-NEXT:    bextrl $2579, {{[0-9]+}}(%esp), %eax # imm = 0xA13
-; X86-BMI1TBM-NEXT:    xorl %edx, %edx
-; X86-BMI1TBM-NEXT:    retl
-;
-; X86-BMI1NOTBMBMI2-LABEL: c0_i64:
-; X86-BMI1NOTBMBMI2:       # %bb.0:
-; X86-BMI1NOTBMBMI2-NEXT:    movl $2579, %eax # imm = 0xA13
-; X86-BMI1NOTBMBMI2-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBMBMI2-NEXT:    xorl %edx, %edx
-; X86-BMI1NOTBMBMI2-NEXT:    retl
+; X86-BMINOTBM-LABEL: c0_i64:
+; X86-BMINOTBM:       # %bb.0:
+; X86-BMINOTBM-NEXT:    movl $2579, %eax # imm = 0xA13
+; X86-BMINOTBM-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMINOTBM-NEXT:    xorl %edx, %edx
+; X86-BMINOTBM-NEXT:    retl
+;
+; X86-BMITBM-LABEL: c0_i64:
+; X86-BMITBM:       # %bb.0:
+; X86-BMITBM-NEXT:    bextrl $2579, {{[0-9]+}}(%esp), %eax # imm = 0xA13
+; X86-BMITBM-NEXT:    xorl %edx, %edx
+; X86-BMITBM-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: c0_i64:
 ; X64-NOBMI:       # %bb.0:
@@ -8328,22 +8293,16 @@ define i64 @c0_i64(i64 %arg) nounwind {
 ; X64-NOBMI-NEXT:    andl $1023, %eax # imm = 0x3FF
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: c0_i64:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    movl $2611, %eax # imm = 0xA33
-; X64-BMI1NOTBM-NEXT:    bextrq %rax, %rdi, %rax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1TBM-LABEL: c0_i64:
-; X64-BMI1TBM:       # %bb.0:
-; X64-BMI1TBM-NEXT:    bextrq $2611, %rdi, %rax # imm = 0xA33
-; X64-BMI1TBM-NEXT:    retq
-;
-; X64-BMI1NOTBMBMI2-LABEL: c0_i64:
-; X64-BMI1NOTBMBMI2:       # %bb.0:
-; X64-BMI1NOTBMBMI2-NEXT:    movl $2611, %eax # imm = 0xA33
-; X64-BMI1NOTBMBMI2-NEXT:    bextrq %rax, %rdi, %rax
-; X64-BMI1NOTBMBMI2-NEXT:    retq
+; X64-BMINOTBM-LABEL: c0_i64:
+; X64-BMINOTBM:       # %bb.0:
+; X64-BMINOTBM-NEXT:    movl $2611, %eax # imm = 0xA33
+; X64-BMINOTBM-NEXT:    bextrq %rax, %rdi, %rax
+; X64-BMINOTBM-NEXT:    retq
+;
+; X64-BMITBM-LABEL: c0_i64:
+; X64-BMITBM:       # %bb.0:
+; X64-BMITBM-NEXT:    bextrq $2611, %rdi, %rax # imm = 0xA33
+; X64-BMITBM-NEXT:    retq
   %tmp0 = lshr i64 %arg, 51
   %tmp1 = and i64 %tmp0, 1023
   ret i64 %tmp1
@@ -8430,28 +8389,20 @@ define void @c5_i32(i32 %arg, i32* %ptr) nounwind {
 ; X86-NOBMI-NEXT:    movl %ecx, (%eax)
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: c5_i32:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movl $2579, %ecx # imm = 0xA13
-; X86-BMI1NOTBM-NEXT:    bextrl %ecx, {{[0-9]+}}(%esp), %ecx
-; X86-BMI1NOTBM-NEXT:    movl %ecx, (%eax)
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1TBM-LABEL: c5_i32:
-; X86-BMI1TBM:       # %bb.0:
-; X86-BMI1TBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1TBM-NEXT:    bextrl $2579, {{[0-9]+}}(%esp), %ecx # imm = 0xA13
-; X86-BMI1TBM-NEXT:    movl %ecx, (%eax)
-; X86-BMI1TBM-NEXT:    retl
-;
-; X86-BMI1NOTBMBMI2-LABEL: c5_i32:
-; X86-BMI1NOTBMBMI2:       # %bb.0:
-; X86-BMI1NOTBMBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBMBMI2-NEXT:    movl $2579, %ecx # imm = 0xA13
-; X86-BMI1NOTBMBMI2-NEXT:    bextrl %ecx, {{[0-9]+}}(%esp), %ecx
-; X86-BMI1NOTBMBMI2-NEXT:    movl %ecx, (%eax)
-; X86-BMI1NOTBMBMI2-NEXT:    retl
+; X86-BMINOTBM-LABEL: c5_i32:
+; X86-BMINOTBM:       # %bb.0:
+; X86-BMINOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMINOTBM-NEXT:    movl $2579, %ecx # imm = 0xA13
+; X86-BMINOTBM-NEXT:    bextrl %ecx, {{[0-9]+}}(%esp), %ecx
+; X86-BMINOTBM-NEXT:    movl %ecx, (%eax)
+; X86-BMINOTBM-NEXT:    retl
+;
+; X86-BMITBM-LABEL: c5_i32:
+; X86-BMITBM:       # %bb.0:
+; X86-BMITBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMITBM-NEXT:    bextrl $2579, {{[0-9]+}}(%esp), %ecx # imm = 0xA13
+; X86-BMITBM-NEXT:    movl %ecx, (%eax)
+; X86-BMITBM-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: c5_i32:
 ; X64-NOBMI:       # %bb.0:
@@ -8460,25 +8411,18 @@ define void @c5_i32(i32 %arg, i32* %ptr) nounwind {
 ; X64-NOBMI-NEXT:    movl %edi, (%rsi)
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: c5_i32:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    movl $2579, %eax # imm = 0xA13
-; X64-BMI1NOTBM-NEXT:    bextrl %eax, %edi, %eax
-; X64-BMI1NOTBM-NEXT:    movl %eax, (%rsi)
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1TBM-LABEL: c5_i32:
-; X64-BMI1TBM:       # %bb.0:
-; X64-BMI1TBM-NEXT:    bextrl $2579, %edi, %eax # imm = 0xA13
-; X64-BMI1TBM-NEXT:    movl %eax, (%rsi)
-; X64-BMI1TBM-NEXT:    retq
-;
-; X64-BMI1NOTBMBMI2-LABEL: c5_i32:
-; X64-BMI1NOTBMBMI2:       # %bb.0:
-; X64-BMI1NOTBMBMI2-NEXT:    movl $2579, %eax # imm = 0xA13
-; X64-BMI1NOTBMBMI2-NEXT:    bextrl %eax, %edi, %eax
-; X64-BMI1NOTBMBMI2-NEXT:    movl %eax, (%rsi)
-; X64-BMI1NOTBMBMI2-NEXT:    retq
+; X64-BMINOTBM-LABEL: c5_i32:
+; X64-BMINOTBM:       # %bb.0:
+; X64-BMINOTBM-NEXT:    movl $2579, %eax # imm = 0xA13
+; X64-BMINOTBM-NEXT:    bextrl %eax, %edi, %eax
+; X64-BMINOTBM-NEXT:    movl %eax, (%rsi)
+; X64-BMINOTBM-NEXT:    retq
+;
+; X64-BMITBM-LABEL: c5_i32:
+; X64-BMITBM:       # %bb.0:
+; X64-BMITBM-NEXT:    bextrl $2579, %edi, %eax # imm = 0xA13
+; X64-BMITBM-NEXT:    movl %eax, (%rsi)
+; X64-BMITBM-NEXT:    retq
   %tmp0 = lshr i32 %arg, 19
   %tmp1 = and i32 %tmp0, 1023
   store i32 %tmp1, i32* %ptr
@@ -8496,28 +8440,20 @@ define void @c6_i32(i32 %arg, i32* %ptr) nounwind {
 ; X86-NOBMI-NEXT:    movl %ecx, (%eax)
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: c6_i32:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movl $3091, %ecx # imm = 0xC13
-; X86-BMI1NOTBM-NEXT:    bextrl %ecx, {{[0-9]+}}(%esp), %ecx
-; X86-BMI1NOTBM-NEXT:    movl %ecx, (%eax)
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1TBM-LABEL: c6_i32:
-; X86-BMI1TBM:       # %bb.0:
-; X86-BMI1TBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1TBM-NEXT:    bextrl $3091, {{[0-9]+}}(%esp), %ecx # imm = 0xC13
-; X86-BMI1TBM-NEXT:    movl %ecx, (%eax)
-; X86-BMI1TBM-NEXT:    retl
-;
-; X86-BMI1NOTBMBMI2-LABEL: c6_i32:
-; X86-BMI1NOTBMBMI2:       # %bb.0:
-; X86-BMI1NOTBMBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBMBMI2-NEXT:    movl $3091, %ecx # imm = 0xC13
-; X86-BMI1NOTBMBMI2-NEXT:    bextrl %ecx, {{[0-9]+}}(%esp), %ecx
-; X86-BMI1NOTBMBMI2-NEXT:    movl %ecx, (%eax)
-; X86-BMI1NOTBMBMI2-NEXT:    retl
+; X86-BMINOTBM-LABEL: c6_i32:
+; X86-BMINOTBM:       # %bb.0:
+; X86-BMINOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMINOTBM-NEXT:    movl $3091, %ecx # imm = 0xC13
+; X86-BMINOTBM-NEXT:    bextrl %ecx, {{[0-9]+}}(%esp), %ecx
+; X86-BMINOTBM-NEXT:    movl %ecx, (%eax)
+; X86-BMINOTBM-NEXT:    retl
+;
+; X86-BMITBM-LABEL: c6_i32:
+; X86-BMITBM:       # %bb.0:
+; X86-BMITBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMITBM-NEXT:    bextrl $3091, {{[0-9]+}}(%esp), %ecx # imm = 0xC13
+; X86-BMITBM-NEXT:    movl %ecx, (%eax)
+; X86-BMITBM-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: c6_i32:
 ; X64-NOBMI:       # %bb.0:
@@ -8526,25 +8462,18 @@ define void @c6_i32(i32 %arg, i32* %ptr) nounwind {
 ; X64-NOBMI-NEXT:    movl %edi, (%rsi)
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: c6_i32:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    movl $3091, %eax # imm = 0xC13
-; X64-BMI1NOTBM-NEXT:    bextrl %eax, %edi, %eax
-; X64-BMI1NOTBM-NEXT:    movl %eax, (%rsi)
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1TBM-LABEL: c6_i32:
-; X64-BMI1TBM:       # %bb.0:
-; X64-BMI1TBM-NEXT:    bextrl $3091, %edi, %eax # imm = 0xC13
-; X64-BMI1TBM-NEXT:    movl %eax, (%rsi)
-; X64-BMI1TBM-NEXT:    retq
-;
-; X64-BMI1NOTBMBMI2-LABEL: c6_i32:
-; X64-BMI1NOTBMBMI2:       # %bb.0:
-; X64-BMI1NOTBMBMI2-NEXT:    movl $3091, %eax # imm = 0xC13
-; X64-BMI1NOTBMBMI2-NEXT:    bextrl %eax, %edi, %eax
-; X64-BMI1NOTBMBMI2-NEXT:    movl %eax, (%rsi)
-; X64-BMI1NOTBMBMI2-NEXT:    retq
+; X64-BMINOTBM-LABEL: c6_i32:
+; X64-BMINOTBM:       # %bb.0:
+; X64-BMINOTBM-NEXT:    movl $3091, %eax # imm = 0xC13
+; X64-BMINOTBM-NEXT:    bextrl %eax, %edi, %eax
+; X64-BMINOTBM-NEXT:    movl %eax, (%rsi)
+; X64-BMINOTBM-NEXT:    retq
+;
+; X64-BMITBM-LABEL: c6_i32:
+; X64-BMITBM:       # %bb.0:
+; X64-BMITBM-NEXT:    bextrl $3091, %edi, %eax # imm = 0xC13
+; X64-BMITBM-NEXT:    movl %eax, (%rsi)
+; X64-BMITBM-NEXT:    retq
   %tmp0 = lshr i32 %arg, 19
   %tmp1 = and i32 %tmp0, 4095
   store i32 %tmp1, i32* %ptr
@@ -8589,31 +8518,22 @@ define void @c5_i64(i64 %arg, i64* %ptr) nounwind {
 ; X86-NOBMI-NEXT:    movl $0, 4(%eax)
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: c5_i64:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movl $2579, %ecx # imm = 0xA13
-; X86-BMI1NOTBM-NEXT:    bextrl %ecx, {{[0-9]+}}(%esp), %ecx
-; X86-BMI1NOTBM-NEXT:    movl %ecx, (%eax)
-; X86-BMI1NOTBM-NEXT:    movl $0, 4(%eax)
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1TBM-LABEL: c5_i64:
-; X86-BMI1TBM:       # %bb.0:
-; X86-BMI1TBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1TBM-NEXT:    bextrl $2579, {{[0-9]+}}(%esp), %ecx # imm = 0xA13
-; X86-BMI1TBM-NEXT:    movl %ecx, (%eax)
-; X86-BMI1TBM-NEXT:    movl $0, 4(%eax)
-; X86-BMI1TBM-NEXT:    retl
-;
-; X86-BMI1NOTBMBMI2-LABEL: c5_i64:
-; X86-BMI1NOTBMBMI2:       # %bb.0:
-; X86-BMI1NOTBMBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBMBMI2-NEXT:    movl $2579, %ecx # imm = 0xA13
-; X86-BMI1NOTBMBMI2-NEXT:    bextrl %ecx, {{[0-9]+}}(%esp), %ecx
-; X86-BMI1NOTBMBMI2-NEXT:    movl %ecx, (%eax)
-; X86-BMI1NOTBMBMI2-NEXT:    movl $0, 4(%eax)
-; X86-BMI1NOTBMBMI2-NEXT:    retl
+; X86-BMINOTBM-LABEL: c5_i64:
+; X86-BMINOTBM:       # %bb.0:
+; X86-BMINOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMINOTBM-NEXT:    movl $2579, %ecx # imm = 0xA13
+; X86-BMINOTBM-NEXT:    bextrl %ecx, {{[0-9]+}}(%esp), %ecx
+; X86-BMINOTBM-NEXT:    movl %ecx, (%eax)
+; X86-BMINOTBM-NEXT:    movl $0, 4(%eax)
+; X86-BMINOTBM-NEXT:    retl
+;
+; X86-BMITBM-LABEL: c5_i64:
+; X86-BMITBM:       # %bb.0:
+; X86-BMITBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMITBM-NEXT:    bextrl $2579, {{[0-9]+}}(%esp), %ecx # imm = 0xA13
+; X86-BMITBM-NEXT:    movl %ecx, (%eax)
+; X86-BMITBM-NEXT:    movl $0, 4(%eax)
+; X86-BMITBM-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: c5_i64:
 ; X64-NOBMI:       # %bb.0:
@@ -8622,25 +8542,18 @@ define void @c5_i64(i64 %arg, i64* %ptr) nounwind {
 ; X64-NOBMI-NEXT:    movq %rdi, (%rsi)
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: c5_i64:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    movl $2611, %eax # imm = 0xA33
-; X64-BMI1NOTBM-NEXT:    bextrq %rax, %rdi, %rax
-; X64-BMI1NOTBM-NEXT:    movq %rax, (%rsi)
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1TBM-LABEL: c5_i64:
-; X64-BMI1TBM:       # %bb.0:
-; X64-BMI1TBM-NEXT:    bextrq $2611, %rdi, %rax # imm = 0xA33
-; X64-BMI1TBM-NEXT:    movq %rax, (%rsi)
-; X64-BMI1TBM-NEXT:    retq
-;
-; X64-BMI1NOTBMBMI2-LABEL: c5_i64:
-; X64-BMI1NOTBMBMI2:       # %bb.0:
-; X64-BMI1NOTBMBMI2-NEXT:    movl $2611, %eax # imm = 0xA33
-; X64-BMI1NOTBMBMI2-NEXT:    bextrq %rax, %rdi, %rax
-; X64-BMI1NOTBMBMI2-NEXT:    movq %rax, (%rsi)
-; X64-BMI1NOTBMBMI2-NEXT:    retq
+; X64-BMINOTBM-LABEL: c5_i64:
+; X64-BMINOTBM:       # %bb.0:
+; X64-BMINOTBM-NEXT:    movl $2611, %eax # imm = 0xA33
+; X64-BMINOTBM-NEXT:    bextrq %rax, %rdi, %rax
+; X64-BMINOTBM-NEXT:    movq %rax, (%rsi)
+; X64-BMINOTBM-NEXT:    retq
+;
+; X64-BMITBM-LABEL: c5_i64:
+; X64-BMITBM:       # %bb.0:
+; X64-BMITBM-NEXT:    bextrq $2611, %rdi, %rax # imm = 0xA33
+; X64-BMITBM-NEXT:    movq %rax, (%rsi)
+; X64-BMITBM-NEXT:    retq
   %tmp0 = lshr i64 %arg, 51
   %tmp1 = and i64 %tmp0, 1023
   store i64 %tmp1, i64* %ptr
@@ -8659,31 +8572,22 @@ define void @c6_i64(i64 %arg, i64* %ptr) nounwind {
 ; X86-NOBMI-NEXT:    movl $0, 4(%eax)
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: c6_i64:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movl $3091, %ecx # imm = 0xC13
-; X86-BMI1NOTBM-NEXT:    bextrl %ecx, {{[0-9]+}}(%esp), %ecx
-; X86-BMI1NOTBM-NEXT:    movl %ecx, (%eax)
-; X86-BMI1NOTBM-NEXT:    movl $0, 4(%eax)
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1TBM-LABEL: c6_i64:
-; X86-BMI1TBM:       # %bb.0:
-; X86-BMI1TBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1TBM-NEXT:    bextrl $3091, {{[0-9]+}}(%esp), %ecx # imm = 0xC13
-; X86-BMI1TBM-NEXT:    movl %ecx, (%eax)
-; X86-BMI1TBM-NEXT:    movl $0, 4(%eax)
-; X86-BMI1TBM-NEXT:    retl
-;
-; X86-BMI1NOTBMBMI2-LABEL: c6_i64:
-; X86-BMI1NOTBMBMI2:       # %bb.0:
-; X86-BMI1NOTBMBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBMBMI2-NEXT:    movl $3091, %ecx # imm = 0xC13
-; X86-BMI1NOTBMBMI2-NEXT:    bextrl %ecx, {{[0-9]+}}(%esp), %ecx
-; X86-BMI1NOTBMBMI2-NEXT:    movl %ecx, (%eax)
-; X86-BMI1NOTBMBMI2-NEXT:    movl $0, 4(%eax)
-; X86-BMI1NOTBMBMI2-NEXT:    retl
+; X86-BMINOTBM-LABEL: c6_i64:
+; X86-BMINOTBM:       # %bb.0:
+; X86-BMINOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMINOTBM-NEXT:    movl $3091, %ecx # imm = 0xC13
+; X86-BMINOTBM-NEXT:    bextrl %ecx, {{[0-9]+}}(%esp), %ecx
+; X86-BMINOTBM-NEXT:    movl %ecx, (%eax)
+; X86-BMINOTBM-NEXT:    movl $0, 4(%eax)
+; X86-BMINOTBM-NEXT:    retl
+;
+; X86-BMITBM-LABEL: c6_i64:
+; X86-BMITBM:       # %bb.0:
+; X86-BMITBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMITBM-NEXT:    bextrl $3091, {{[0-9]+}}(%esp), %ecx # imm = 0xC13
+; X86-BMITBM-NEXT:    movl %ecx, (%eax)
+; X86-BMITBM-NEXT:    movl $0, 4(%eax)
+; X86-BMITBM-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: c6_i64:
 ; X64-NOBMI:       # %bb.0:
@@ -8692,25 +8596,18 @@ define void @c6_i64(i64 %arg, i64* %ptr) nounwind {
 ; X64-NOBMI-NEXT:    movq %rdi, (%rsi)
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: c6_i64:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    movl $3123, %eax # imm = 0xC33
-; X64-BMI1NOTBM-NEXT:    bextrq %rax, %rdi, %rax
-; X64-BMI1NOTBM-NEXT:    movq %rax, (%rsi)
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1TBM-LABEL: c6_i64:
-; X64-BMI1TBM:       # %bb.0:
-; X64-BMI1TBM-NEXT:    bextrq $3123, %rdi, %rax # imm = 0xC33
-; X64-BMI1TBM-NEXT:    movq %rax, (%rsi)
-; X64-BMI1TBM-NEXT:    retq
-;
-; X64-BMI1NOTBMBMI2-LABEL: c6_i64:
-; X64-BMI1NOTBMBMI2:       # %bb.0:
-; X64-BMI1NOTBMBMI2-NEXT:    movl $3123, %eax # imm = 0xC33
-; X64-BMI1NOTBMBMI2-NEXT:    bextrq %rax, %rdi, %rax
-; X64-BMI1NOTBMBMI2-NEXT:    movq %rax, (%rsi)
-; X64-BMI1NOTBMBMI2-NEXT:    retq
+; X64-BMINOTBM-LABEL: c6_i64:
+; X64-BMINOTBM:       # %bb.0:
+; X64-BMINOTBM-NEXT:    movl $3123, %eax # imm = 0xC33
+; X64-BMINOTBM-NEXT:    bextrq %rax, %rdi, %rax
+; X64-BMINOTBM-NEXT:    movq %rax, (%rsi)
+; X64-BMINOTBM-NEXT:    retq
+;
+; X64-BMITBM-LABEL: c6_i64:
+; X64-BMITBM:       # %bb.0:
+; X64-BMITBM-NEXT:    bextrq $3123, %rdi, %rax # imm = 0xC33
+; X64-BMITBM-NEXT:    movq %rax, (%rsi)
+; X64-BMITBM-NEXT:    retq
   %tmp0 = lshr i64 %arg, 51
   %tmp1 = and i64 %tmp0, 4095
   store i64 %tmp1, i64* %ptr

diff  --git a/llvm/test/CodeGen/X86/extract-lowbits.ll b/llvm/test/CodeGen/X86/extract-lowbits.ll
index 4c11d5514022..177f99e7660c 100644
--- a/llvm/test/CodeGen/X86/extract-lowbits.ll
+++ b/llvm/test/CodeGen/X86/extract-lowbits.ll
@@ -1,14 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,NOBMI,X86-NOBMI
-; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1NOTBM,X86-BMI1NOTBM
-; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1TBM,X86-BMI1TBM
-; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1BMI2,X86-BMI1BMI2,BMI1TBM,X86-BMI1TBM,BMI1TBMBMI2,X86-BMI1TBMBMI2
-; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1BMI2,X86-BMI1BMI2,BMI1NOTBMBMI2,X86-BMI1NOTBMBMI2
-; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,NOBMI,X64-NOBMI
-; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1NOTBM,X64-BMI1NOTBM
-; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1TBM,X64-BMI1TBM
-; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1BMI2,X64-BMI1BMI2,BMI1TBM,X64-BMI1TBM,BMI1TBMBMI2,X64-BMI1TBMBMI2
-; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1BMI2,X64-BMI1BMI2,BMI1NOTBMBMI2,X64-BMI1NOTBMBMI2
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-NOBMI
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI1
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI1
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI2
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI2
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-NOBMI
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI1,X64-BMI1NOTBM
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI1,X64-BMI1TBM
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI2,X64-BMI2TBM
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI2,X64-BMI2NOTBM
 
 ; *Please* keep in sync with test/CodeGen/AArch64/extract-lowbits.ll
 
@@ -37,18 +37,18 @@ define i32 @bzhi32_a0(i32 %val, i32 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi32_a0:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1NOTBM-NEXT:    shll $8, %eax
-; X86-BMI1NOTBM-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    retl
+; X86-BMI1-LABEL: bzhi32_a0:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    shll $8, %eax
+; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    retl
 ;
-; X86-BMI1BMI2-LABEL: bzhi32_a0:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI2-LABEL: bzhi32_a0:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi32_a0:
 ; X64-NOBMI:       # %bb.0:
@@ -60,16 +60,16 @@ define i32 @bzhi32_a0(i32 %val, i32 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    andl %edi, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi32_a0:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrl %esi, %edi, %eax
-; X64-BMI1NOTBM-NEXT:    retq
+; X64-BMI1-LABEL: bzhi32_a0:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
+; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI1BMI2-LABEL: bzhi32_a0:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI2-LABEL: bzhi32_a0:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
+; X64-BMI2-NEXT:    retq
   %onebit = shl i32 1, %numlowbits
   %mask = add nsw i32 %onebit, -1
   %masked = and i32 %mask, %val
@@ -86,18 +86,18 @@ define i32 @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi32_a1_indexzext:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1NOTBM-NEXT:    shll $8, %eax
-; X86-BMI1NOTBM-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    retl
+; X86-BMI1-LABEL: bzhi32_a1_indexzext:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    shll $8, %eax
+; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    retl
 ;
-; X86-BMI1BMI2-LABEL: bzhi32_a1_indexzext:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI2-LABEL: bzhi32_a1_indexzext:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi32_a1_indexzext:
 ; X64-NOBMI:       # %bb.0:
@@ -109,16 +109,16 @@ define i32 @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    andl %edi, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi32_a1_indexzext:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrl %esi, %edi, %eax
-; X64-BMI1NOTBM-NEXT:    retq
+; X64-BMI1-LABEL: bzhi32_a1_indexzext:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
+; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI1BMI2-LABEL: bzhi32_a1_indexzext:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI2-LABEL: bzhi32_a1_indexzext:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
+; X64-BMI2-NEXT:    retq
   %conv = zext i8 %numlowbits to i32
   %onebit = shl i32 1, %conv
   %mask = add nsw i32 %onebit, -1
@@ -137,20 +137,20 @@ define i32 @bzhi32_a2_load(i32* %w, i32 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    andl (%edx), %eax
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi32_a2_load:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    shll $8, %ecx
-; X86-BMI1NOTBM-NEXT:    bextrl %ecx, (%eax), %eax
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bzhi32_a2_load:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    bzhil %ecx, (%eax), %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bzhi32_a2_load:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    shll $8, %ecx
+; X86-BMI1-NEXT:    bextrl %ecx, (%eax), %eax
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bzhi32_a2_load:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    bzhil %ecx, (%eax), %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi32_a2_load:
 ; X64-NOBMI:       # %bb.0:
@@ -162,16 +162,16 @@ define i32 @bzhi32_a2_load(i32* %w, i32 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    andl (%rdi), %eax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi32_a2_load:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrl %esi, (%rdi), %eax
-; X64-BMI1NOTBM-NEXT:    retq
+; X64-BMI1-LABEL: bzhi32_a2_load:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrl %esi, (%rdi), %eax
+; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI1BMI2-LABEL: bzhi32_a2_load:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhil %esi, (%rdi), %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI2-LABEL: bzhi32_a2_load:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    bzhil %esi, (%rdi), %eax
+; X64-BMI2-NEXT:    retq
   %val = load i32, i32* %w
   %onebit = shl i32 1, %numlowbits
   %mask = add nsw i32 %onebit, -1
@@ -190,20 +190,20 @@ define i32 @bzhi32_a3_load_indexzext(i32* %w, i8 zeroext %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    andl (%edx), %eax
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi32_a3_load_indexzext:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    shll $8, %ecx
-; X86-BMI1NOTBM-NEXT:    bextrl %ecx, (%eax), %eax
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bzhi32_a3_load_indexzext:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    bzhil %ecx, (%eax), %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bzhi32_a3_load_indexzext:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    shll $8, %ecx
+; X86-BMI1-NEXT:    bextrl %ecx, (%eax), %eax
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bzhi32_a3_load_indexzext:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    bzhil %ecx, (%eax), %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi32_a3_load_indexzext:
 ; X64-NOBMI:       # %bb.0:
@@ -215,16 +215,16 @@ define i32 @bzhi32_a3_load_indexzext(i32* %w, i8 zeroext %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    andl (%rdi), %eax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi32_a3_load_indexzext:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrl %esi, (%rdi), %eax
-; X64-BMI1NOTBM-NEXT:    retq
+; X64-BMI1-LABEL: bzhi32_a3_load_indexzext:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrl %esi, (%rdi), %eax
+; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI1BMI2-LABEL: bzhi32_a3_load_indexzext:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhil %esi, (%rdi), %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI2-LABEL: bzhi32_a3_load_indexzext:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    bzhil %esi, (%rdi), %eax
+; X64-BMI2-NEXT:    retq
   %val = load i32, i32* %w
   %conv = zext i8 %numlowbits to i32
   %onebit = shl i32 1, %conv
@@ -243,18 +243,18 @@ define i32 @bzhi32_a4_commutative(i32 %val, i32 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi32_a4_commutative:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1NOTBM-NEXT:    shll $8, %eax
-; X86-BMI1NOTBM-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    retl
+; X86-BMI1-LABEL: bzhi32_a4_commutative:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    shll $8, %eax
+; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    retl
 ;
-; X86-BMI1BMI2-LABEL: bzhi32_a4_commutative:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI2-LABEL: bzhi32_a4_commutative:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi32_a4_commutative:
 ; X64-NOBMI:       # %bb.0:
@@ -266,16 +266,16 @@ define i32 @bzhi32_a4_commutative(i32 %val, i32 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    andl %edi, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi32_a4_commutative:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrl %esi, %edi, %eax
-; X64-BMI1NOTBM-NEXT:    retq
+; X64-BMI1-LABEL: bzhi32_a4_commutative:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
+; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI1BMI2-LABEL: bzhi32_a4_commutative:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI2-LABEL: bzhi32_a4_commutative:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
+; X64-BMI2-NEXT:    retq
   %onebit = shl i32 1, %numlowbits
   %mask = add nsw i32 %onebit, -1
   %masked = and i32 %val, %mask ; swapped order
@@ -304,43 +304,43 @@ define i64 @bzhi64_a0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi64_a0:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $1, %eax
-; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
-; X86-BMI1NOTBM-NEXT:    shldl %cl, %eax, %edx
-; X86-BMI1NOTBM-NEXT:    shll %cl, %eax
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB5_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB5_2:
-; X86-BMI1NOTBM-NEXT:    addl $-1, %eax
-; X86-BMI1NOTBM-NEXT:    adcl $-1, %edx
-; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %edx
-; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bzhi64_a0:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl $1, %eax
-; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %edx
-; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %eax
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB5_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:    xorl %eax, %eax
-; X86-BMI1BMI2-NEXT:  .LBB5_2:
-; X86-BMI1BMI2-NEXT:    addl $-1, %eax
-; X86-BMI1BMI2-NEXT:    adcl $-1, %edx
-; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bzhi64_a0:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $1, %eax
+; X86-BMI1-NEXT:    xorl %edx, %edx
+; X86-BMI1-NEXT:    shldl %cl, %eax, %edx
+; X86-BMI1-NEXT:    shll %cl, %eax
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB5_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %eax, %edx
+; X86-BMI1-NEXT:    xorl %eax, %eax
+; X86-BMI1-NEXT:  .LBB5_2:
+; X86-BMI1-NEXT:    addl $-1, %eax
+; X86-BMI1-NEXT:    adcl $-1, %edx
+; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bzhi64_a0:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl $1, %eax
+; X86-BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
+; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB5_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %eax, %edx
+; X86-BMI2-NEXT:    xorl %eax, %eax
+; X86-BMI2-NEXT:  .LBB5_2:
+; X86-BMI2-NEXT:    addl $-1, %eax
+; X86-BMI2-NEXT:    adcl $-1, %edx
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_a0:
 ; X64-NOBMI:       # %bb.0:
@@ -352,16 +352,16 @@ define i64 @bzhi64_a0(i64 %val, i64 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    andq %rdi, %rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi64_a0:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrq %rsi, %rdi, %rax
-; X64-BMI1NOTBM-NEXT:    retq
+; X64-BMI1-LABEL: bzhi64_a0:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrq %rsi, %rdi, %rax
+; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI1BMI2-LABEL: bzhi64_a0:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI2-LABEL: bzhi64_a0:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
+; X64-BMI2-NEXT:    retq
   %onebit = shl i64 1, %numlowbits
   %mask = add nsw i64 %onebit, -1
   %masked = and i64 %mask, %val
@@ -388,43 +388,43 @@ define i64 @bzhi64_a1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi64_a1_indexzext:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $1, %eax
-; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
-; X86-BMI1NOTBM-NEXT:    shldl %cl, %eax, %edx
-; X86-BMI1NOTBM-NEXT:    shll %cl, %eax
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB6_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB6_2:
-; X86-BMI1NOTBM-NEXT:    addl $-1, %eax
-; X86-BMI1NOTBM-NEXT:    adcl $-1, %edx
-; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %edx
-; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bzhi64_a1_indexzext:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl $1, %eax
-; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %edx
-; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %eax
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB6_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:    xorl %eax, %eax
-; X86-BMI1BMI2-NEXT:  .LBB6_2:
-; X86-BMI1BMI2-NEXT:    addl $-1, %eax
-; X86-BMI1BMI2-NEXT:    adcl $-1, %edx
-; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bzhi64_a1_indexzext:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $1, %eax
+; X86-BMI1-NEXT:    xorl %edx, %edx
+; X86-BMI1-NEXT:    shldl %cl, %eax, %edx
+; X86-BMI1-NEXT:    shll %cl, %eax
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB6_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %eax, %edx
+; X86-BMI1-NEXT:    xorl %eax, %eax
+; X86-BMI1-NEXT:  .LBB6_2:
+; X86-BMI1-NEXT:    addl $-1, %eax
+; X86-BMI1-NEXT:    adcl $-1, %edx
+; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bzhi64_a1_indexzext:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl $1, %eax
+; X86-BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
+; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB6_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %eax, %edx
+; X86-BMI2-NEXT:    xorl %eax, %eax
+; X86-BMI2-NEXT:  .LBB6_2:
+; X86-BMI2-NEXT:    addl $-1, %eax
+; X86-BMI2-NEXT:    adcl $-1, %edx
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_a1_indexzext:
 ; X64-NOBMI:       # %bb.0:
@@ -436,18 +436,18 @@ define i64 @bzhi64_a1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    andq %rdi, %rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi64_a1_indexzext:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrq %rsi, %rdi, %rax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bzhi64_a1_indexzext:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-BMI1BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bzhi64_a1_indexzext:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrq %rsi, %rdi, %rax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bzhi64_a1_indexzext:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
+; X64-BMI2-NEXT:    retq
   %conv = zext i8 %numlowbits to i64
   %onebit = shl i64 1, %conv
   %mask = add nsw i64 %onebit, -1
@@ -478,49 +478,49 @@ define i64 @bzhi64_a2_load(i64* %w, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi64_a2_load:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $1, %eax
-; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
-; X86-BMI1NOTBM-NEXT:    shldl %cl, %eax, %edx
-; X86-BMI1NOTBM-NEXT:    shll %cl, %eax
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB7_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB7_2:
-; X86-BMI1NOTBM-NEXT:    addl $-1, %eax
-; X86-BMI1NOTBM-NEXT:    adcl $-1, %edx
-; X86-BMI1NOTBM-NEXT:    andl 4(%esi), %edx
-; X86-BMI1NOTBM-NEXT:    andl (%esi), %eax
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bzhi64_a2_load:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl $1, %eax
-; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %edx
-; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %eax
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB7_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:    xorl %eax, %eax
-; X86-BMI1BMI2-NEXT:  .LBB7_2:
-; X86-BMI1BMI2-NEXT:    addl $-1, %eax
-; X86-BMI1BMI2-NEXT:    adcl $-1, %edx
-; X86-BMI1BMI2-NEXT:    andl 4(%esi), %edx
-; X86-BMI1BMI2-NEXT:    andl (%esi), %eax
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bzhi64_a2_load:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $1, %eax
+; X86-BMI1-NEXT:    xorl %edx, %edx
+; X86-BMI1-NEXT:    shldl %cl, %eax, %edx
+; X86-BMI1-NEXT:    shll %cl, %eax
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB7_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %eax, %edx
+; X86-BMI1-NEXT:    xorl %eax, %eax
+; X86-BMI1-NEXT:  .LBB7_2:
+; X86-BMI1-NEXT:    addl $-1, %eax
+; X86-BMI1-NEXT:    adcl $-1, %edx
+; X86-BMI1-NEXT:    andl 4(%esi), %edx
+; X86-BMI1-NEXT:    andl (%esi), %eax
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bzhi64_a2_load:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl $1, %eax
+; X86-BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
+; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB7_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %eax, %edx
+; X86-BMI2-NEXT:    xorl %eax, %eax
+; X86-BMI2-NEXT:  .LBB7_2:
+; X86-BMI2-NEXT:    addl $-1, %eax
+; X86-BMI2-NEXT:    adcl $-1, %edx
+; X86-BMI2-NEXT:    andl 4(%esi), %edx
+; X86-BMI2-NEXT:    andl (%esi), %eax
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_a2_load:
 ; X64-NOBMI:       # %bb.0:
@@ -532,16 +532,16 @@ define i64 @bzhi64_a2_load(i64* %w, i64 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    andq (%rdi), %rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi64_a2_load:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrq %rsi, (%rdi), %rax
-; X64-BMI1NOTBM-NEXT:    retq
+; X64-BMI1-LABEL: bzhi64_a2_load:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrq %rsi, (%rdi), %rax
+; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI1BMI2-LABEL: bzhi64_a2_load:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI2-LABEL: bzhi64_a2_load:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
+; X64-BMI2-NEXT:    retq
   %val = load i64, i64* %w
   %onebit = shl i64 1, %numlowbits
   %mask = add nsw i64 %onebit, -1
@@ -572,49 +572,49 @@ define i64 @bzhi64_a3_load_indexzext(i64* %w, i8 zeroext %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi64_a3_load_indexzext:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $1, %eax
-; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
-; X86-BMI1NOTBM-NEXT:    shldl %cl, %eax, %edx
-; X86-BMI1NOTBM-NEXT:    shll %cl, %eax
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB8_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB8_2:
-; X86-BMI1NOTBM-NEXT:    addl $-1, %eax
-; X86-BMI1NOTBM-NEXT:    adcl $-1, %edx
-; X86-BMI1NOTBM-NEXT:    andl 4(%esi), %edx
-; X86-BMI1NOTBM-NEXT:    andl (%esi), %eax
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bzhi64_a3_load_indexzext:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl $1, %eax
-; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %edx
-; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %eax
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB8_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:    xorl %eax, %eax
-; X86-BMI1BMI2-NEXT:  .LBB8_2:
-; X86-BMI1BMI2-NEXT:    addl $-1, %eax
-; X86-BMI1BMI2-NEXT:    adcl $-1, %edx
-; X86-BMI1BMI2-NEXT:    andl 4(%esi), %edx
-; X86-BMI1BMI2-NEXT:    andl (%esi), %eax
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bzhi64_a3_load_indexzext:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $1, %eax
+; X86-BMI1-NEXT:    xorl %edx, %edx
+; X86-BMI1-NEXT:    shldl %cl, %eax, %edx
+; X86-BMI1-NEXT:    shll %cl, %eax
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB8_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %eax, %edx
+; X86-BMI1-NEXT:    xorl %eax, %eax
+; X86-BMI1-NEXT:  .LBB8_2:
+; X86-BMI1-NEXT:    addl $-1, %eax
+; X86-BMI1-NEXT:    adcl $-1, %edx
+; X86-BMI1-NEXT:    andl 4(%esi), %edx
+; X86-BMI1-NEXT:    andl (%esi), %eax
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bzhi64_a3_load_indexzext:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl $1, %eax
+; X86-BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
+; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB8_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %eax, %edx
+; X86-BMI2-NEXT:    xorl %eax, %eax
+; X86-BMI2-NEXT:  .LBB8_2:
+; X86-BMI2-NEXT:    addl $-1, %eax
+; X86-BMI2-NEXT:    adcl $-1, %edx
+; X86-BMI2-NEXT:    andl 4(%esi), %edx
+; X86-BMI2-NEXT:    andl (%esi), %eax
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_a3_load_indexzext:
 ; X64-NOBMI:       # %bb.0:
@@ -626,18 +626,18 @@ define i64 @bzhi64_a3_load_indexzext(i64* %w, i8 zeroext %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    andq (%rdi), %rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi64_a3_load_indexzext:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrq %rsi, (%rdi), %rax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bzhi64_a3_load_indexzext:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-BMI1BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bzhi64_a3_load_indexzext:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrq %rsi, (%rdi), %rax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bzhi64_a3_load_indexzext:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
+; X64-BMI2-NEXT:    retq
   %val = load i64, i64* %w
   %conv = zext i8 %numlowbits to i64
   %onebit = shl i64 1, %conv
@@ -666,43 +666,43 @@ define i64 @bzhi64_a4_commutative(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi64_a4_commutative:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $1, %eax
-; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
-; X86-BMI1NOTBM-NEXT:    shldl %cl, %eax, %edx
-; X86-BMI1NOTBM-NEXT:    shll %cl, %eax
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB9_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB9_2:
-; X86-BMI1NOTBM-NEXT:    addl $-1, %eax
-; X86-BMI1NOTBM-NEXT:    adcl $-1, %edx
-; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %edx
-; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bzhi64_a4_commutative:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl $1, %eax
-; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %edx
-; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %eax
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB9_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:    xorl %eax, %eax
-; X86-BMI1BMI2-NEXT:  .LBB9_2:
-; X86-BMI1BMI2-NEXT:    addl $-1, %eax
-; X86-BMI1BMI2-NEXT:    adcl $-1, %edx
-; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bzhi64_a4_commutative:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $1, %eax
+; X86-BMI1-NEXT:    xorl %edx, %edx
+; X86-BMI1-NEXT:    shldl %cl, %eax, %edx
+; X86-BMI1-NEXT:    shll %cl, %eax
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB9_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %eax, %edx
+; X86-BMI1-NEXT:    xorl %eax, %eax
+; X86-BMI1-NEXT:  .LBB9_2:
+; X86-BMI1-NEXT:    addl $-1, %eax
+; X86-BMI1-NEXT:    adcl $-1, %edx
+; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bzhi64_a4_commutative:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl $1, %eax
+; X86-BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
+; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB9_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %eax, %edx
+; X86-BMI2-NEXT:    xorl %eax, %eax
+; X86-BMI2-NEXT:  .LBB9_2:
+; X86-BMI2-NEXT:    addl $-1, %eax
+; X86-BMI2-NEXT:    adcl $-1, %edx
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_a4_commutative:
 ; X64-NOBMI:       # %bb.0:
@@ -714,16 +714,16 @@ define i64 @bzhi64_a4_commutative(i64 %val, i64 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    andq %rdi, %rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi64_a4_commutative:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrq %rsi, %rdi, %rax
-; X64-BMI1NOTBM-NEXT:    retq
+; X64-BMI1-LABEL: bzhi64_a4_commutative:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrq %rsi, %rdi, %rax
+; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI1BMI2-LABEL: bzhi64_a4_commutative:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI2-LABEL: bzhi64_a4_commutative:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
+; X64-BMI2-NEXT:    retq
   %onebit = shl i64 1, %numlowbits
   %mask = add nsw i64 %onebit, -1
   %masked = and i64 %val, %mask ; swapped order
@@ -749,34 +749,34 @@ define i32 @bzhi64_32_a0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi64_32_a0:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $1, %edx
-; X86-BMI1NOTBM-NEXT:    shll %cl, %edx
-; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB10_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %edx, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB10_2:
-; X86-BMI1NOTBM-NEXT:    decl %eax
-; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bzhi64_32_a0:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    xorl %eax, %eax
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    jne .LBB10_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl $1, %eax
-; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %eax
-; X86-BMI1BMI2-NEXT:  .LBB10_2:
-; X86-BMI1BMI2-NEXT:    decl %eax
-; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bzhi64_32_a0:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $1, %edx
+; X86-BMI1-NEXT:    shll %cl, %edx
+; X86-BMI1-NEXT:    xorl %eax, %eax
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    jne .LBB10_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %edx, %eax
+; X86-BMI1-NEXT:  .LBB10_2:
+; X86-BMI1-NEXT:    decl %eax
+; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bzhi64_32_a0:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    xorl %eax, %eax
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    jne .LBB10_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl $1, %eax
+; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
+; X86-BMI2-NEXT:  .LBB10_2:
+; X86-BMI2-NEXT:    decl %eax
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_32_a0:
 ; X64-NOBMI:       # %bb.0:
@@ -789,16 +789,16 @@ define i32 @bzhi64_32_a0(i64 %val, i64 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi64_32_a0:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrl %esi, %edi, %eax
-; X64-BMI1NOTBM-NEXT:    retq
+; X64-BMI1-LABEL: bzhi64_32_a0:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
+; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI1BMI2-LABEL: bzhi64_32_a0:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI2-LABEL: bzhi64_32_a0:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
+; X64-BMI2-NEXT:    retq
   %onebit = shl i64 1, %numlowbits
   %mask = add nsw i64 %onebit, -1
   %masked = and i64 %mask, %val
@@ -817,18 +817,18 @@ define i32 @bzhi64_32_a1(i64 %val, i32 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi64_32_a1:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1NOTBM-NEXT:    shll $8, %eax
-; X86-BMI1NOTBM-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    retl
+; X86-BMI1-LABEL: bzhi64_32_a1:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    shll $8, %eax
+; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    retl
 ;
-; X86-BMI1BMI2-LABEL: bzhi64_32_a1:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI2-LABEL: bzhi64_32_a1:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_32_a1:
 ; X64-NOBMI:       # %bb.0:
@@ -840,16 +840,16 @@ define i32 @bzhi64_32_a1(i64 %val, i32 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    andl %edi, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi64_32_a1:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrl %esi, %edi, %eax
-; X64-BMI1NOTBM-NEXT:    retq
+; X64-BMI1-LABEL: bzhi64_32_a1:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
+; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI1BMI2-LABEL: bzhi64_32_a1:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI2-LABEL: bzhi64_32_a1:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
+; X64-BMI2-NEXT:    retq
   %truncval = trunc i64 %val to i32
   %onebit = shl i32 1, %numlowbits
   %mask = add nsw i32 %onebit, -1
@@ -879,36 +879,36 @@ define i32 @bzhi64_32_a1_trunc_extrause(i64 %val, i32 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    popl %ebx
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi64_32_a1_trunc_extrause:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %ebx
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    pushl %eax
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %bl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1NOTBM-NEXT:    movl %esi, (%esp)
-; X86-BMI1NOTBM-NEXT:    calll use32 at PLT
-; X86-BMI1NOTBM-NEXT:    shll $8, %ebx
-; X86-BMI1NOTBM-NEXT:    bextrl %ebx, %esi, %eax
-; X86-BMI1NOTBM-NEXT:    addl $4, %esp
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %ebx
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bzhi64_32_a1_trunc_extrause:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    pushl %eax
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1BMI2-NEXT:    movl %esi, (%esp)
-; X86-BMI1BMI2-NEXT:    calll use32 at PLT
-; X86-BMI1BMI2-NEXT:    bzhil %ebx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    addl $4, %esp
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %ebx
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bzhi64_32_a1_trunc_extrause:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %ebx
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    pushl %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    movl %esi, (%esp)
+; X86-BMI1-NEXT:    calll use32 at PLT
+; X86-BMI1-NEXT:    shll $8, %ebx
+; X86-BMI1-NEXT:    bextrl %ebx, %esi, %eax
+; X86-BMI1-NEXT:    addl $4, %esp
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %ebx
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bzhi64_32_a1_trunc_extrause:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    pushl %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    movl %esi, (%esp)
+; X86-BMI2-NEXT:    calll use32 at PLT
+; X86-BMI2-NEXT:    bzhil %ebx, %esi, %eax
+; X86-BMI2-NEXT:    addl $4, %esp
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %ebx
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_32_a1_trunc_extrause:
 ; X64-NOBMI:       # %bb.0:
@@ -928,34 +928,34 @@ define i32 @bzhi64_32_a1_trunc_extrause(i64 %val, i32 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    popq %rbp
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi64_32_a1_trunc_extrause:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    pushq %r14
-; X64-BMI1NOTBM-NEXT:    pushq %rbx
-; X64-BMI1NOTBM-NEXT:    pushq %rax
-; X64-BMI1NOTBM-NEXT:    movl %esi, %ebx
-; X64-BMI1NOTBM-NEXT:    movq %rdi, %r14
-; X64-BMI1NOTBM-NEXT:    callq use32 at PLT
-; X64-BMI1NOTBM-NEXT:    shll $8, %ebx
-; X64-BMI1NOTBM-NEXT:    bextrl %ebx, %r14d, %eax
-; X64-BMI1NOTBM-NEXT:    addq $8, %rsp
-; X64-BMI1NOTBM-NEXT:    popq %rbx
-; X64-BMI1NOTBM-NEXT:    popq %r14
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bzhi64_32_a1_trunc_extrause:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    pushq %rbp
-; X64-BMI1BMI2-NEXT:    pushq %rbx
-; X64-BMI1BMI2-NEXT:    pushq %rax
-; X64-BMI1BMI2-NEXT:    movl %esi, %ebp
-; X64-BMI1BMI2-NEXT:    movq %rdi, %rbx
-; X64-BMI1BMI2-NEXT:    callq use32 at PLT
-; X64-BMI1BMI2-NEXT:    bzhil %ebp, %ebx, %eax
-; X64-BMI1BMI2-NEXT:    addq $8, %rsp
-; X64-BMI1BMI2-NEXT:    popq %rbx
-; X64-BMI1BMI2-NEXT:    popq %rbp
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bzhi64_32_a1_trunc_extrause:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    pushq %r14
+; X64-BMI1-NEXT:    pushq %rbx
+; X64-BMI1-NEXT:    pushq %rax
+; X64-BMI1-NEXT:    movl %esi, %ebx
+; X64-BMI1-NEXT:    movq %rdi, %r14
+; X64-BMI1-NEXT:    callq use32 at PLT
+; X64-BMI1-NEXT:    shll $8, %ebx
+; X64-BMI1-NEXT:    bextrl %ebx, %r14d, %eax
+; X64-BMI1-NEXT:    addq $8, %rsp
+; X64-BMI1-NEXT:    popq %rbx
+; X64-BMI1-NEXT:    popq %r14
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bzhi64_32_a1_trunc_extrause:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    pushq %rbp
+; X64-BMI2-NEXT:    pushq %rbx
+; X64-BMI2-NEXT:    pushq %rax
+; X64-BMI2-NEXT:    movl %esi, %ebp
+; X64-BMI2-NEXT:    movq %rdi, %rbx
+; X64-BMI2-NEXT:    callq use32 at PLT
+; X64-BMI2-NEXT:    bzhil %ebp, %ebx, %eax
+; X64-BMI2-NEXT:    addq $8, %rsp
+; X64-BMI2-NEXT:    popq %rbx
+; X64-BMI2-NEXT:    popq %rbp
+; X64-BMI2-NEXT:    retq
   %truncval = trunc i64 %val to i32
   call void @use32(i32 %truncval)
   %onebit = shl i32 1, %numlowbits
@@ -976,18 +976,18 @@ define i32 @bzhi64_32_a2(i64 %val, i32 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi64_32_a2:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1NOTBM-NEXT:    shll $8, %eax
-; X86-BMI1NOTBM-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    retl
+; X86-BMI1-LABEL: bzhi64_32_a2:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    shll $8, %eax
+; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    retl
 ;
-; X86-BMI1BMI2-LABEL: bzhi64_32_a2:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI2-LABEL: bzhi64_32_a2:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_32_a2:
 ; X64-NOBMI:       # %bb.0:
@@ -999,16 +999,16 @@ define i32 @bzhi64_32_a2(i64 %val, i32 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    andl %edi, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi64_32_a2:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrl %esi, %edi, %eax
-; X64-BMI1NOTBM-NEXT:    retq
+; X64-BMI1-LABEL: bzhi64_32_a2:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
+; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI1BMI2-LABEL: bzhi64_32_a2:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI2-LABEL: bzhi64_32_a2:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
+; X64-BMI2-NEXT:    retq
   %onebit = shl i32 1, %numlowbits
   %mask = add nsw i32 %onebit, -1
   %zextmask = zext i32 %mask to i64
@@ -1035,34 +1035,34 @@ define i32 @bzhi64_32_a3(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi64_32_a3:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $1, %edx
-; X86-BMI1NOTBM-NEXT:    shll %cl, %edx
-; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB14_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %edx, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB14_2:
-; X86-BMI1NOTBM-NEXT:    decl %eax
-; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bzhi64_32_a3:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    xorl %eax, %eax
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    jne .LBB14_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl $1, %eax
-; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %eax
-; X86-BMI1BMI2-NEXT:  .LBB14_2:
-; X86-BMI1BMI2-NEXT:    decl %eax
-; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bzhi64_32_a3:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $1, %edx
+; X86-BMI1-NEXT:    shll %cl, %edx
+; X86-BMI1-NEXT:    xorl %eax, %eax
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    jne .LBB14_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %edx, %eax
+; X86-BMI1-NEXT:  .LBB14_2:
+; X86-BMI1-NEXT:    decl %eax
+; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bzhi64_32_a3:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    xorl %eax, %eax
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    jne .LBB14_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl $1, %eax
+; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
+; X86-BMI2-NEXT:  .LBB14_2:
+; X86-BMI2-NEXT:    decl %eax
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_32_a3:
 ; X64-NOBMI:       # %bb.0:
@@ -1075,16 +1075,16 @@ define i32 @bzhi64_32_a3(i64 %val, i64 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi64_32_a3:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrl %esi, %edi, %eax
-; X64-BMI1NOTBM-NEXT:    retq
+; X64-BMI1-LABEL: bzhi64_32_a3:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
+; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI1BMI2-LABEL: bzhi64_32_a3:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI2-LABEL: bzhi64_32_a3:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
+; X64-BMI2-NEXT:    retq
   %onebit = shl i64 1, %numlowbits
   %mask = add nsw i64 %onebit, 4294967295
   %masked = and i64 %mask, %val
@@ -1106,18 +1106,18 @@ define i32 @bzhi32_b0(i32 %val, i32 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi32_b0:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1NOTBM-NEXT:    shll $8, %eax
-; X86-BMI1NOTBM-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    retl
+; X86-BMI1-LABEL: bzhi32_b0:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    shll $8, %eax
+; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    retl
 ;
-; X86-BMI1BMI2-LABEL: bzhi32_b0:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI2-LABEL: bzhi32_b0:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi32_b0:
 ; X64-NOBMI:       # %bb.0:
@@ -1129,16 +1129,16 @@ define i32 @bzhi32_b0(i32 %val, i32 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    andl %edi, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi32_b0:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrl %esi, %edi, %eax
-; X64-BMI1NOTBM-NEXT:    retq
+; X64-BMI1-LABEL: bzhi32_b0:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
+; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI1BMI2-LABEL: bzhi32_b0:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI2-LABEL: bzhi32_b0:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
+; X64-BMI2-NEXT:    retq
   %notmask = shl i32 -1, %numlowbits
   %mask = xor i32 %notmask, -1
   %masked = and i32 %mask, %val
@@ -1155,18 +1155,18 @@ define i32 @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi32_b1_indexzext:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1NOTBM-NEXT:    shll $8, %eax
-; X86-BMI1NOTBM-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    retl
+; X86-BMI1-LABEL: bzhi32_b1_indexzext:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    shll $8, %eax
+; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    retl
 ;
-; X86-BMI1BMI2-LABEL: bzhi32_b1_indexzext:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI2-LABEL: bzhi32_b1_indexzext:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi32_b1_indexzext:
 ; X64-NOBMI:       # %bb.0:
@@ -1178,16 +1178,16 @@ define i32 @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    andl %edi, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi32_b1_indexzext:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrl %esi, %edi, %eax
-; X64-BMI1NOTBM-NEXT:    retq
+; X64-BMI1-LABEL: bzhi32_b1_indexzext:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
+; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI1BMI2-LABEL: bzhi32_b1_indexzext:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI2-LABEL: bzhi32_b1_indexzext:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
+; X64-BMI2-NEXT:    retq
   %conv = zext i8 %numlowbits to i32
   %notmask = shl i32 -1, %conv
   %mask = xor i32 %notmask, -1
@@ -1206,20 +1206,20 @@ define i32 @bzhi32_b2_load(i32* %w, i32 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    andl (%edx), %eax
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi32_b2_load:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    shll $8, %ecx
-; X86-BMI1NOTBM-NEXT:    bextrl %ecx, (%eax), %eax
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bzhi32_b2_load:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    bzhil %ecx, (%eax), %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bzhi32_b2_load:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    shll $8, %ecx
+; X86-BMI1-NEXT:    bextrl %ecx, (%eax), %eax
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bzhi32_b2_load:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    bzhil %ecx, (%eax), %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi32_b2_load:
 ; X64-NOBMI:       # %bb.0:
@@ -1231,16 +1231,16 @@ define i32 @bzhi32_b2_load(i32* %w, i32 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    andl (%rdi), %eax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi32_b2_load:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrl %esi, (%rdi), %eax
-; X64-BMI1NOTBM-NEXT:    retq
+; X64-BMI1-LABEL: bzhi32_b2_load:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrl %esi, (%rdi), %eax
+; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI1BMI2-LABEL: bzhi32_b2_load:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhil %esi, (%rdi), %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI2-LABEL: bzhi32_b2_load:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    bzhil %esi, (%rdi), %eax
+; X64-BMI2-NEXT:    retq
   %val = load i32, i32* %w
   %notmask = shl i32 -1, %numlowbits
   %mask = xor i32 %notmask, -1
@@ -1259,20 +1259,20 @@ define i32 @bzhi32_b3_load_indexzext(i32* %w, i8 zeroext %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    andl (%edx), %eax
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi32_b3_load_indexzext:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    shll $8, %ecx
-; X86-BMI1NOTBM-NEXT:    bextrl %ecx, (%eax), %eax
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bzhi32_b3_load_indexzext:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    bzhil %ecx, (%eax), %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bzhi32_b3_load_indexzext:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    shll $8, %ecx
+; X86-BMI1-NEXT:    bextrl %ecx, (%eax), %eax
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bzhi32_b3_load_indexzext:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    bzhil %ecx, (%eax), %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi32_b3_load_indexzext:
 ; X64-NOBMI:       # %bb.0:
@@ -1284,16 +1284,16 @@ define i32 @bzhi32_b3_load_indexzext(i32* %w, i8 zeroext %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    andl (%rdi), %eax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi32_b3_load_indexzext:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrl %esi, (%rdi), %eax
-; X64-BMI1NOTBM-NEXT:    retq
+; X64-BMI1-LABEL: bzhi32_b3_load_indexzext:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrl %esi, (%rdi), %eax
+; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI1BMI2-LABEL: bzhi32_b3_load_indexzext:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhil %esi, (%rdi), %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI2-LABEL: bzhi32_b3_load_indexzext:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    bzhil %esi, (%rdi), %eax
+; X64-BMI2-NEXT:    retq
   %val = load i32, i32* %w
   %conv = zext i8 %numlowbits to i32
   %notmask = shl i32 -1, %conv
@@ -1312,18 +1312,18 @@ define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi32_b4_commutative:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1NOTBM-NEXT:    shll $8, %eax
-; X86-BMI1NOTBM-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    retl
+; X86-BMI1-LABEL: bzhi32_b4_commutative:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    shll $8, %eax
+; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    retl
 ;
-; X86-BMI1BMI2-LABEL: bzhi32_b4_commutative:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI2-LABEL: bzhi32_b4_commutative:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi32_b4_commutative:
 ; X64-NOBMI:       # %bb.0:
@@ -1335,16 +1335,16 @@ define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    andl %edi, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi32_b4_commutative:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrl %esi, %edi, %eax
-; X64-BMI1NOTBM-NEXT:    retq
+; X64-BMI1-LABEL: bzhi32_b4_commutative:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
+; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI1BMI2-LABEL: bzhi32_b4_commutative:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI2-LABEL: bzhi32_b4_commutative:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
+; X64-BMI2-NEXT:    retq
   %notmask = shl i32 -1, %numlowbits
   %mask = xor i32 %notmask, -1
   %masked = and i32 %val, %mask ; swapped order
@@ -1377,36 +1377,36 @@ define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi64_b0:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $-1, %edx
-; X86-BMI1NOTBM-NEXT:    movl $-1, %eax
-; X86-BMI1NOTBM-NEXT:    shll %cl, %eax
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB20_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB20_2:
-; X86-BMI1NOTBM-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
-; X86-BMI1NOTBM-NEXT:    andnl {{[0-9]+}}(%esp), %edx, %edx
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bzhi64_b0:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %dl
-; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
-; X86-BMI1BMI2-NEXT:    shlxl %edx, %ecx, %eax
-; X86-BMI1BMI2-NEXT:    testb $32, %dl
-; X86-BMI1BMI2-NEXT:    je .LBB20_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %eax, %ecx
-; X86-BMI1BMI2-NEXT:    xorl %eax, %eax
-; X86-BMI1BMI2-NEXT:  .LBB20_2:
-; X86-BMI1BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
-; X86-BMI1BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %edx
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bzhi64_b0:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $-1, %edx
+; X86-BMI1-NEXT:    movl $-1, %eax
+; X86-BMI1-NEXT:    shll %cl, %eax
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB20_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %eax, %edx
+; X86-BMI1-NEXT:    xorl %eax, %eax
+; X86-BMI1-NEXT:  .LBB20_2:
+; X86-BMI1-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI1-NEXT:    andnl {{[0-9]+}}(%esp), %edx, %edx
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bzhi64_b0:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-BMI2-NEXT:    movl $-1, %ecx
+; X86-BMI2-NEXT:    shlxl %edx, %ecx, %eax
+; X86-BMI2-NEXT:    testb $32, %dl
+; X86-BMI2-NEXT:    je .LBB20_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %eax, %ecx
+; X86-BMI2-NEXT:    xorl %eax, %eax
+; X86-BMI2-NEXT:  .LBB20_2:
+; X86-BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %edx
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_b0:
 ; X64-NOBMI:       # %bb.0:
@@ -1418,16 +1418,16 @@ define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    andq %rdi, %rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi64_b0:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrq %rsi, %rdi, %rax
-; X64-BMI1NOTBM-NEXT:    retq
+; X64-BMI1-LABEL: bzhi64_b0:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrq %rsi, %rdi, %rax
+; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI1BMI2-LABEL: bzhi64_b0:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI2-LABEL: bzhi64_b0:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
+; X64-BMI2-NEXT:    retq
   %notmask = shl i64 -1, %numlowbits
   %mask = xor i64 %notmask, -1
   %masked = and i64 %mask, %val
@@ -1458,36 +1458,36 @@ define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi64_b1_indexzext:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $-1, %edx
-; X86-BMI1NOTBM-NEXT:    movl $-1, %eax
-; X86-BMI1NOTBM-NEXT:    shll %cl, %eax
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB21_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB21_2:
-; X86-BMI1NOTBM-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
-; X86-BMI1NOTBM-NEXT:    andnl {{[0-9]+}}(%esp), %edx, %edx
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bzhi64_b1_indexzext:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %dl
-; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
-; X86-BMI1BMI2-NEXT:    shlxl %edx, %ecx, %eax
-; X86-BMI1BMI2-NEXT:    testb $32, %dl
-; X86-BMI1BMI2-NEXT:    je .LBB21_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %eax, %ecx
-; X86-BMI1BMI2-NEXT:    xorl %eax, %eax
-; X86-BMI1BMI2-NEXT:  .LBB21_2:
-; X86-BMI1BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
-; X86-BMI1BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %edx
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bzhi64_b1_indexzext:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $-1, %edx
+; X86-BMI1-NEXT:    movl $-1, %eax
+; X86-BMI1-NEXT:    shll %cl, %eax
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB21_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %eax, %edx
+; X86-BMI1-NEXT:    xorl %eax, %eax
+; X86-BMI1-NEXT:  .LBB21_2:
+; X86-BMI1-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI1-NEXT:    andnl {{[0-9]+}}(%esp), %edx, %edx
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bzhi64_b1_indexzext:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-BMI2-NEXT:    movl $-1, %ecx
+; X86-BMI2-NEXT:    shlxl %edx, %ecx, %eax
+; X86-BMI2-NEXT:    testb $32, %dl
+; X86-BMI2-NEXT:    je .LBB21_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %eax, %ecx
+; X86-BMI2-NEXT:    xorl %eax, %eax
+; X86-BMI2-NEXT:  .LBB21_2:
+; X86-BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %edx
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_b1_indexzext:
 ; X64-NOBMI:       # %bb.0:
@@ -1499,18 +1499,18 @@ define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    andq %rdi, %rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi64_b1_indexzext:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrq %rsi, %rdi, %rax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bzhi64_b1_indexzext:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-BMI1BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bzhi64_b1_indexzext:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrq %rsi, %rdi, %rax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bzhi64_b1_indexzext:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
+; X64-BMI2-NEXT:    retq
   %conv = zext i8 %numlowbits to i64
   %notmask = shl i64 -1, %conv
   %mask = xor i64 %notmask, -1
@@ -1545,42 +1545,42 @@ define i64 @bzhi64_b2_load(i64* %w, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi64_b2_load:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
-; X86-BMI1NOTBM-NEXT:    movl $-1, %eax
-; X86-BMI1NOTBM-NEXT:    shll %cl, %eax
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB22_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %eax, %esi
-; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB22_2:
-; X86-BMI1NOTBM-NEXT:    andnl (%edx), %eax, %eax
-; X86-BMI1NOTBM-NEXT:    andnl 4(%edx), %esi, %edx
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bzhi64_b2_load:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
-; X86-BMI1BMI2-NEXT:    movl $-1, %edx
-; X86-BMI1BMI2-NEXT:    shlxl %ebx, %edx, %eax
-; X86-BMI1BMI2-NEXT:    testb $32, %bl
-; X86-BMI1BMI2-NEXT:    je .LBB22_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:    xorl %eax, %eax
-; X86-BMI1BMI2-NEXT:  .LBB22_2:
-; X86-BMI1BMI2-NEXT:    andnl (%ecx), %eax, %eax
-; X86-BMI1BMI2-NEXT:    andnl 4(%ecx), %edx, %edx
-; X86-BMI1BMI2-NEXT:    popl %ebx
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bzhi64_b2_load:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $-1, %esi
+; X86-BMI1-NEXT:    movl $-1, %eax
+; X86-BMI1-NEXT:    shll %cl, %eax
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB22_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %eax, %esi
+; X86-BMI1-NEXT:    xorl %eax, %eax
+; X86-BMI1-NEXT:  .LBB22_2:
+; X86-BMI1-NEXT:    andnl (%edx), %eax, %eax
+; X86-BMI1-NEXT:    andnl 4(%edx), %esi, %edx
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bzhi64_b2_load:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movl $-1, %edx
+; X86-BMI2-NEXT:    shlxl %ebx, %edx, %eax
+; X86-BMI2-NEXT:    testb $32, %bl
+; X86-BMI2-NEXT:    je .LBB22_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %eax, %edx
+; X86-BMI2-NEXT:    xorl %eax, %eax
+; X86-BMI2-NEXT:  .LBB22_2:
+; X86-BMI2-NEXT:    andnl (%ecx), %eax, %eax
+; X86-BMI2-NEXT:    andnl 4(%ecx), %edx, %edx
+; X86-BMI2-NEXT:    popl %ebx
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_b2_load:
 ; X64-NOBMI:       # %bb.0:
@@ -1592,16 +1592,16 @@ define i64 @bzhi64_b2_load(i64* %w, i64 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    andq (%rdi), %rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi64_b2_load:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrq %rsi, (%rdi), %rax
-; X64-BMI1NOTBM-NEXT:    retq
+; X64-BMI1-LABEL: bzhi64_b2_load:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrq %rsi, (%rdi), %rax
+; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI1BMI2-LABEL: bzhi64_b2_load:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI2-LABEL: bzhi64_b2_load:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
+; X64-BMI2-NEXT:    retq
   %val = load i64, i64* %w
   %notmask = shl i64 -1, %numlowbits
   %mask = xor i64 %notmask, -1
@@ -1636,42 +1636,42 @@ define i64 @bzhi64_b3_load_indexzext(i64* %w, i8 zeroext %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi64_b3_load_indexzext:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
-; X86-BMI1NOTBM-NEXT:    movl $-1, %eax
-; X86-BMI1NOTBM-NEXT:    shll %cl, %eax
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB23_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %eax, %esi
-; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB23_2:
-; X86-BMI1NOTBM-NEXT:    andnl (%edx), %eax, %eax
-; X86-BMI1NOTBM-NEXT:    andnl 4(%edx), %esi, %edx
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bzhi64_b3_load_indexzext:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
-; X86-BMI1BMI2-NEXT:    movl $-1, %edx
-; X86-BMI1BMI2-NEXT:    shlxl %ebx, %edx, %eax
-; X86-BMI1BMI2-NEXT:    testb $32, %bl
-; X86-BMI1BMI2-NEXT:    je .LBB23_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:    xorl %eax, %eax
-; X86-BMI1BMI2-NEXT:  .LBB23_2:
-; X86-BMI1BMI2-NEXT:    andnl (%ecx), %eax, %eax
-; X86-BMI1BMI2-NEXT:    andnl 4(%ecx), %edx, %edx
-; X86-BMI1BMI2-NEXT:    popl %ebx
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bzhi64_b3_load_indexzext:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $-1, %esi
+; X86-BMI1-NEXT:    movl $-1, %eax
+; X86-BMI1-NEXT:    shll %cl, %eax
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB23_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %eax, %esi
+; X86-BMI1-NEXT:    xorl %eax, %eax
+; X86-BMI1-NEXT:  .LBB23_2:
+; X86-BMI1-NEXT:    andnl (%edx), %eax, %eax
+; X86-BMI1-NEXT:    andnl 4(%edx), %esi, %edx
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bzhi64_b3_load_indexzext:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movl $-1, %edx
+; X86-BMI2-NEXT:    shlxl %ebx, %edx, %eax
+; X86-BMI2-NEXT:    testb $32, %bl
+; X86-BMI2-NEXT:    je .LBB23_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %eax, %edx
+; X86-BMI2-NEXT:    xorl %eax, %eax
+; X86-BMI2-NEXT:  .LBB23_2:
+; X86-BMI2-NEXT:    andnl (%ecx), %eax, %eax
+; X86-BMI2-NEXT:    andnl 4(%ecx), %edx, %edx
+; X86-BMI2-NEXT:    popl %ebx
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_b3_load_indexzext:
 ; X64-NOBMI:       # %bb.0:
@@ -1683,18 +1683,18 @@ define i64 @bzhi64_b3_load_indexzext(i64* %w, i8 zeroext %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    andq (%rdi), %rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi64_b3_load_indexzext:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrq %rsi, (%rdi), %rax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bzhi64_b3_load_indexzext:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-BMI1BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bzhi64_b3_load_indexzext:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrq %rsi, (%rdi), %rax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bzhi64_b3_load_indexzext:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
+; X64-BMI2-NEXT:    retq
   %val = load i64, i64* %w
   %conv = zext i8 %numlowbits to i64
   %notmask = shl i64 -1, %conv
@@ -1727,36 +1727,36 @@ define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi64_b4_commutative:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $-1, %edx
-; X86-BMI1NOTBM-NEXT:    movl $-1, %eax
-; X86-BMI1NOTBM-NEXT:    shll %cl, %eax
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB24_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:    xorl %eax, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB24_2:
-; X86-BMI1NOTBM-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
-; X86-BMI1NOTBM-NEXT:    andnl {{[0-9]+}}(%esp), %edx, %edx
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bzhi64_b4_commutative:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %dl
-; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
-; X86-BMI1BMI2-NEXT:    shlxl %edx, %ecx, %eax
-; X86-BMI1BMI2-NEXT:    testb $32, %dl
-; X86-BMI1BMI2-NEXT:    je .LBB24_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %eax, %ecx
-; X86-BMI1BMI2-NEXT:    xorl %eax, %eax
-; X86-BMI1BMI2-NEXT:  .LBB24_2:
-; X86-BMI1BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
-; X86-BMI1BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %edx
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bzhi64_b4_commutative:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $-1, %edx
+; X86-BMI1-NEXT:    movl $-1, %eax
+; X86-BMI1-NEXT:    shll %cl, %eax
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB24_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %eax, %edx
+; X86-BMI1-NEXT:    xorl %eax, %eax
+; X86-BMI1-NEXT:  .LBB24_2:
+; X86-BMI1-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI1-NEXT:    andnl {{[0-9]+}}(%esp), %edx, %edx
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bzhi64_b4_commutative:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-BMI2-NEXT:    movl $-1, %ecx
+; X86-BMI2-NEXT:    shlxl %edx, %ecx, %eax
+; X86-BMI2-NEXT:    testb $32, %dl
+; X86-BMI2-NEXT:    je .LBB24_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %eax, %ecx
+; X86-BMI2-NEXT:    xorl %eax, %eax
+; X86-BMI2-NEXT:  .LBB24_2:
+; X86-BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %edx
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_b4_commutative:
 ; X64-NOBMI:       # %bb.0:
@@ -1768,16 +1768,16 @@ define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    andq %rdi, %rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi64_b4_commutative:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrq %rsi, %rdi, %rax
-; X64-BMI1NOTBM-NEXT:    retq
+; X64-BMI1-LABEL: bzhi64_b4_commutative:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrq %rsi, %rdi, %rax
+; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI1BMI2-LABEL: bzhi64_b4_commutative:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI2-LABEL: bzhi64_b4_commutative:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
+; X64-BMI2-NEXT:    retq
   %notmask = shl i64 -1, %numlowbits
   %mask = xor i64 %notmask, -1
   %masked = and i64 %val, %mask ; swapped order
@@ -1803,32 +1803,32 @@ define i32 @bzhi64_32_b0(i64 %val, i8 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi64_32_b0:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $-1, %eax
-; X86-BMI1NOTBM-NEXT:    shll %cl, %eax
-; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB25_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB25_2:
-; X86-BMI1NOTBM-NEXT:    andnl {{[0-9]+}}(%esp), %edx, %eax
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bzhi64_32_b0:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    xorl %ecx, %ecx
-; X86-BMI1BMI2-NEXT:    testb $32, %al
-; X86-BMI1BMI2-NEXT:    jne .LBB25_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
-; X86-BMI1BMI2-NEXT:    shlxl %eax, %ecx, %ecx
-; X86-BMI1BMI2-NEXT:  .LBB25_2:
-; X86-BMI1BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bzhi64_32_b0:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $-1, %eax
+; X86-BMI1-NEXT:    shll %cl, %eax
+; X86-BMI1-NEXT:    xorl %edx, %edx
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    jne .LBB25_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %eax, %edx
+; X86-BMI1-NEXT:  .LBB25_2:
+; X86-BMI1-NEXT:    andnl {{[0-9]+}}(%esp), %edx, %eax
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bzhi64_32_b0:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    xorl %ecx, %ecx
+; X86-BMI2-NEXT:    testb $32, %al
+; X86-BMI2-NEXT:    jne .LBB25_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl $-1, %ecx
+; X86-BMI2-NEXT:    shlxl %eax, %ecx, %ecx
+; X86-BMI2-NEXT:  .LBB25_2:
+; X86-BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_32_b0:
 ; X64-NOBMI:       # %bb.0:
@@ -1841,16 +1841,16 @@ define i32 @bzhi64_32_b0(i64 %val, i8 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi64_32_b0:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrl %esi, %edi, %eax
-; X64-BMI1NOTBM-NEXT:    retq
+; X64-BMI1-LABEL: bzhi64_32_b0:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
+; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI1BMI2-LABEL: bzhi64_32_b0:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI2-LABEL: bzhi64_32_b0:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
+; X64-BMI2-NEXT:    retq
   %widenumlowbits = zext i8 %numlowbits to i64
   %notmask = shl nsw i64 -1, %widenumlowbits
   %mask = xor i64 %notmask, -1
@@ -1870,18 +1870,18 @@ define i32 @bzhi64_32_b1(i64 %val, i8 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi64_32_b1:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1NOTBM-NEXT:    shll $8, %eax
-; X86-BMI1NOTBM-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    retl
+; X86-BMI1-LABEL: bzhi64_32_b1:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    shll $8, %eax
+; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    retl
 ;
-; X86-BMI1BMI2-LABEL: bzhi64_32_b1:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI2-LABEL: bzhi64_32_b1:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_32_b1:
 ; X64-NOBMI:       # %bb.0:
@@ -1893,16 +1893,16 @@ define i32 @bzhi64_32_b1(i64 %val, i8 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    andl %edi, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi64_32_b1:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrl %esi, %edi, %eax
-; X64-BMI1NOTBM-NEXT:    retq
+; X64-BMI1-LABEL: bzhi64_32_b1:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
+; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI1BMI2-LABEL: bzhi64_32_b1:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI2-LABEL: bzhi64_32_b1:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
+; X64-BMI2-NEXT:    retq
   %truncval = trunc i64 %val to i32
   %widenumlowbits = zext i8 %numlowbits to i32
   %notmask = shl nsw i32 -1, %widenumlowbits
@@ -1923,18 +1923,18 @@ define i32 @bzhi64_32_b2(i64 %val, i8 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi64_32_b2:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1NOTBM-NEXT:    shll $8, %eax
-; X86-BMI1NOTBM-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    retl
+; X86-BMI1-LABEL: bzhi64_32_b2:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    shll $8, %eax
+; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    retl
 ;
-; X86-BMI1BMI2-LABEL: bzhi64_32_b2:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI2-LABEL: bzhi64_32_b2:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_32_b2:
 ; X64-NOBMI:       # %bb.0:
@@ -1946,16 +1946,16 @@ define i32 @bzhi64_32_b2(i64 %val, i8 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    andl %edi, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi64_32_b2:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrl %esi, %edi, %eax
-; X64-BMI1NOTBM-NEXT:    retq
+; X64-BMI1-LABEL: bzhi64_32_b2:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
+; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI1BMI2-LABEL: bzhi64_32_b2:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI2-LABEL: bzhi64_32_b2:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
+; X64-BMI2-NEXT:    retq
   %widenumlowbits = zext i8 %numlowbits to i32
   %notmask = shl nsw i32 -1, %widenumlowbits
   %mask = xor i32 %notmask, -1
@@ -1983,32 +1983,32 @@ define i32 @bzhi64_32_b3(i64 %val, i8 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi64_32_b3:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $-1, %eax
-; X86-BMI1NOTBM-NEXT:    shll %cl, %eax
-; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB28_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB28_2:
-; X86-BMI1NOTBM-NEXT:    andnl {{[0-9]+}}(%esp), %edx, %eax
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bzhi64_32_b3:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    xorl %ecx, %ecx
-; X86-BMI1BMI2-NEXT:    testb $32, %al
-; X86-BMI1BMI2-NEXT:    jne .LBB28_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
-; X86-BMI1BMI2-NEXT:    shlxl %eax, %ecx, %ecx
-; X86-BMI1BMI2-NEXT:  .LBB28_2:
-; X86-BMI1BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bzhi64_32_b3:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $-1, %eax
+; X86-BMI1-NEXT:    shll %cl, %eax
+; X86-BMI1-NEXT:    xorl %edx, %edx
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    jne .LBB28_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %eax, %edx
+; X86-BMI1-NEXT:  .LBB28_2:
+; X86-BMI1-NEXT:    andnl {{[0-9]+}}(%esp), %edx, %eax
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bzhi64_32_b3:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    xorl %ecx, %ecx
+; X86-BMI2-NEXT:    testb $32, %al
+; X86-BMI2-NEXT:    jne .LBB28_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl $-1, %ecx
+; X86-BMI2-NEXT:    shlxl %eax, %ecx, %ecx
+; X86-BMI2-NEXT:  .LBB28_2:
+; X86-BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_32_b3:
 ; X64-NOBMI:       # %bb.0:
@@ -2022,16 +2022,16 @@ define i32 @bzhi64_32_b3(i64 %val, i8 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi64_32_b3:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrl %esi, %edi, %eax
-; X64-BMI1NOTBM-NEXT:    retq
+; X64-BMI1-LABEL: bzhi64_32_b3:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
+; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI1BMI2-LABEL: bzhi64_32_b3:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI2-LABEL: bzhi64_32_b3:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
+; X64-BMI2-NEXT:    retq
   %widenumlowbits = zext i8 %numlowbits to i64
   %notmask = shl nsw i64 4294967295, %widenumlowbits
   %mask = xor i64 %notmask, 4294967295
@@ -2064,38 +2064,38 @@ define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi32_c0:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    subl $8, %esp
-; X86-BMI1NOTBM-NEXT:    xorl %ecx, %ecx
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
-; X86-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %esi
-; X86-BMI1NOTBM-NEXT:    movl %esi, (%esp)
-; X86-BMI1NOTBM-NEXT:    calll use32 at PLT
-; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %esi
-; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:    addl $8, %esp
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bzhi32_c0:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    subl $8, %esp
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
-; X86-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X86-BMI1BMI2-NEXT:    negb %al
-; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
-; X86-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %eax
-; X86-BMI1BMI2-NEXT:    movl %eax, (%esp)
-; X86-BMI1BMI2-NEXT:    calll use32 at PLT
-; X86-BMI1BMI2-NEXT:    bzhil %ebx, {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    addl $8, %esp
-; X86-BMI1BMI2-NEXT:    popl %ebx
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bzhi32_c0:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    subl $8, %esp
+; X86-BMI1-NEXT:    xorl %ecx, %ecx
+; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $-1, %esi
+; X86-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI1-NEXT:    shrl %cl, %esi
+; X86-BMI1-NEXT:    movl %esi, (%esp)
+; X86-BMI1-NEXT:    calll use32 at PLT
+; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    movl %esi, %eax
+; X86-BMI1-NEXT:    addl $8, %esp
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bzhi32_c0:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    subl $8, %esp
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movl %ebx, %eax
+; X86-BMI2-NEXT:    negb %al
+; X86-BMI2-NEXT:    movl $-1, %ecx
+; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
+; X86-BMI2-NEXT:    movl %eax, (%esp)
+; X86-BMI2-NEXT:    calll use32 at PLT
+; X86-BMI2-NEXT:    bzhil %ebx, {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    addl $8, %esp
+; X86-BMI2-NEXT:    popl %ebx
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi32_c0:
 ; X64-NOBMI:       # %bb.0:
@@ -2117,43 +2117,43 @@ define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    popq %rbp
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi32_c0:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    pushq %rbp
-; X64-BMI1NOTBM-NEXT:    pushq %rbx
-; X64-BMI1NOTBM-NEXT:    pushq %rax
-; X64-BMI1NOTBM-NEXT:    movl %esi, %ecx
-; X64-BMI1NOTBM-NEXT:    movl %edi, %ebx
-; X64-BMI1NOTBM-NEXT:    negb %cl
-; X64-BMI1NOTBM-NEXT:    movl $-1, %ebp
-; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-BMI1NOTBM-NEXT:    shrl %cl, %ebp
-; X64-BMI1NOTBM-NEXT:    movl %ebp, %edi
-; X64-BMI1NOTBM-NEXT:    callq use32 at PLT
-; X64-BMI1NOTBM-NEXT:    andl %ebx, %ebp
-; X64-BMI1NOTBM-NEXT:    movl %ebp, %eax
-; X64-BMI1NOTBM-NEXT:    addq $8, %rsp
-; X64-BMI1NOTBM-NEXT:    popq %rbx
-; X64-BMI1NOTBM-NEXT:    popq %rbp
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bzhi32_c0:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    pushq %rbp
-; X64-BMI1BMI2-NEXT:    pushq %rbx
-; X64-BMI1BMI2-NEXT:    pushq %rax
-; X64-BMI1BMI2-NEXT:    movl %esi, %ebx
-; X64-BMI1BMI2-NEXT:    movl %edi, %ebp
-; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI1BMI2-NEXT:    negb %al
-; X64-BMI1BMI2-NEXT:    movl $-1, %ecx
-; X64-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %edi
-; X64-BMI1BMI2-NEXT:    callq use32 at PLT
-; X64-BMI1BMI2-NEXT:    bzhil %ebx, %ebp, %eax
-; X64-BMI1BMI2-NEXT:    addq $8, %rsp
-; X64-BMI1BMI2-NEXT:    popq %rbx
-; X64-BMI1BMI2-NEXT:    popq %rbp
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bzhi32_c0:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    pushq %rbp
+; X64-BMI1-NEXT:    pushq %rbx
+; X64-BMI1-NEXT:    pushq %rax
+; X64-BMI1-NEXT:    movl %esi, %ecx
+; X64-BMI1-NEXT:    movl %edi, %ebx
+; X64-BMI1-NEXT:    negb %cl
+; X64-BMI1-NEXT:    movl $-1, %ebp
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI1-NEXT:    shrl %cl, %ebp
+; X64-BMI1-NEXT:    movl %ebp, %edi
+; X64-BMI1-NEXT:    callq use32 at PLT
+; X64-BMI1-NEXT:    andl %ebx, %ebp
+; X64-BMI1-NEXT:    movl %ebp, %eax
+; X64-BMI1-NEXT:    addq $8, %rsp
+; X64-BMI1-NEXT:    popq %rbx
+; X64-BMI1-NEXT:    popq %rbp
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bzhi32_c0:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    pushq %rbp
+; X64-BMI2-NEXT:    pushq %rbx
+; X64-BMI2-NEXT:    pushq %rax
+; X64-BMI2-NEXT:    movl %esi, %ebx
+; X64-BMI2-NEXT:    movl %edi, %ebp
+; X64-BMI2-NEXT:    movl %ebx, %eax
+; X64-BMI2-NEXT:    negb %al
+; X64-BMI2-NEXT:    movl $-1, %ecx
+; X64-BMI2-NEXT:    shrxl %eax, %ecx, %edi
+; X64-BMI2-NEXT:    callq use32 at PLT
+; X64-BMI2-NEXT:    bzhil %ebx, %ebp, %eax
+; X64-BMI2-NEXT:    addq $8, %rsp
+; X64-BMI2-NEXT:    popq %rbx
+; X64-BMI2-NEXT:    popq %rbp
+; X64-BMI2-NEXT:    retq
   %numhighbits = sub i32 32, %numlowbits
   %mask = lshr i32 -1, %numhighbits
   call void @use32(i32 %mask)
@@ -2179,38 +2179,38 @@ define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi32_c1_indexzext:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    subl $8, %esp
-; X86-BMI1NOTBM-NEXT:    xorl %ecx, %ecx
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
-; X86-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %esi
-; X86-BMI1NOTBM-NEXT:    movl %esi, (%esp)
-; X86-BMI1NOTBM-NEXT:    calll use32 at PLT
-; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %esi
-; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:    addl $8, %esp
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bzhi32_c1_indexzext:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    subl $8, %esp
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
-; X86-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X86-BMI1BMI2-NEXT:    negb %al
-; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
-; X86-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %eax
-; X86-BMI1BMI2-NEXT:    movl %eax, (%esp)
-; X86-BMI1BMI2-NEXT:    calll use32 at PLT
-; X86-BMI1BMI2-NEXT:    bzhil %ebx, {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    addl $8, %esp
-; X86-BMI1BMI2-NEXT:    popl %ebx
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bzhi32_c1_indexzext:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    subl $8, %esp
+; X86-BMI1-NEXT:    xorl %ecx, %ecx
+; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $-1, %esi
+; X86-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI1-NEXT:    shrl %cl, %esi
+; X86-BMI1-NEXT:    movl %esi, (%esp)
+; X86-BMI1-NEXT:    calll use32 at PLT
+; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    movl %esi, %eax
+; X86-BMI1-NEXT:    addl $8, %esp
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bzhi32_c1_indexzext:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    subl $8, %esp
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movl %ebx, %eax
+; X86-BMI2-NEXT:    negb %al
+; X86-BMI2-NEXT:    movl $-1, %ecx
+; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
+; X86-BMI2-NEXT:    movl %eax, (%esp)
+; X86-BMI2-NEXT:    calll use32 at PLT
+; X86-BMI2-NEXT:    bzhil %ebx, {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    addl $8, %esp
+; X86-BMI2-NEXT:    popl %ebx
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi32_c1_indexzext:
 ; X64-NOBMI:       # %bb.0:
@@ -2232,43 +2232,43 @@ define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    popq %rbp
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi32_c1_indexzext:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    pushq %rbp
-; X64-BMI1NOTBM-NEXT:    pushq %rbx
-; X64-BMI1NOTBM-NEXT:    pushq %rax
-; X64-BMI1NOTBM-NEXT:    movl %esi, %ecx
-; X64-BMI1NOTBM-NEXT:    movl %edi, %ebx
-; X64-BMI1NOTBM-NEXT:    negb %cl
-; X64-BMI1NOTBM-NEXT:    movl $-1, %ebp
-; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-BMI1NOTBM-NEXT:    shrl %cl, %ebp
-; X64-BMI1NOTBM-NEXT:    movl %ebp, %edi
-; X64-BMI1NOTBM-NEXT:    callq use32 at PLT
-; X64-BMI1NOTBM-NEXT:    andl %ebx, %ebp
-; X64-BMI1NOTBM-NEXT:    movl %ebp, %eax
-; X64-BMI1NOTBM-NEXT:    addq $8, %rsp
-; X64-BMI1NOTBM-NEXT:    popq %rbx
-; X64-BMI1NOTBM-NEXT:    popq %rbp
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bzhi32_c1_indexzext:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    pushq %rbp
-; X64-BMI1BMI2-NEXT:    pushq %rbx
-; X64-BMI1BMI2-NEXT:    pushq %rax
-; X64-BMI1BMI2-NEXT:    movl %esi, %ebx
-; X64-BMI1BMI2-NEXT:    movl %edi, %ebp
-; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI1BMI2-NEXT:    negb %al
-; X64-BMI1BMI2-NEXT:    movl $-1, %ecx
-; X64-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %edi
-; X64-BMI1BMI2-NEXT:    callq use32 at PLT
-; X64-BMI1BMI2-NEXT:    bzhil %ebx, %ebp, %eax
-; X64-BMI1BMI2-NEXT:    addq $8, %rsp
-; X64-BMI1BMI2-NEXT:    popq %rbx
-; X64-BMI1BMI2-NEXT:    popq %rbp
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bzhi32_c1_indexzext:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    pushq %rbp
+; X64-BMI1-NEXT:    pushq %rbx
+; X64-BMI1-NEXT:    pushq %rax
+; X64-BMI1-NEXT:    movl %esi, %ecx
+; X64-BMI1-NEXT:    movl %edi, %ebx
+; X64-BMI1-NEXT:    negb %cl
+; X64-BMI1-NEXT:    movl $-1, %ebp
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI1-NEXT:    shrl %cl, %ebp
+; X64-BMI1-NEXT:    movl %ebp, %edi
+; X64-BMI1-NEXT:    callq use32 at PLT
+; X64-BMI1-NEXT:    andl %ebx, %ebp
+; X64-BMI1-NEXT:    movl %ebp, %eax
+; X64-BMI1-NEXT:    addq $8, %rsp
+; X64-BMI1-NEXT:    popq %rbx
+; X64-BMI1-NEXT:    popq %rbp
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bzhi32_c1_indexzext:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    pushq %rbp
+; X64-BMI2-NEXT:    pushq %rbx
+; X64-BMI2-NEXT:    pushq %rax
+; X64-BMI2-NEXT:    movl %esi, %ebx
+; X64-BMI2-NEXT:    movl %edi, %ebp
+; X64-BMI2-NEXT:    movl %ebx, %eax
+; X64-BMI2-NEXT:    negb %al
+; X64-BMI2-NEXT:    movl $-1, %ecx
+; X64-BMI2-NEXT:    shrxl %eax, %ecx, %edi
+; X64-BMI2-NEXT:    callq use32 at PLT
+; X64-BMI2-NEXT:    bzhil %ebx, %ebp, %eax
+; X64-BMI2-NEXT:    addq $8, %rsp
+; X64-BMI2-NEXT:    popq %rbx
+; X64-BMI2-NEXT:    popq %rbp
+; X64-BMI2-NEXT:    retq
   %numhighbits = sub i8 32, %numlowbits
   %sh_prom = zext i8 %numhighbits to i32
   %mask = lshr i32 -1, %sh_prom
@@ -2297,42 +2297,42 @@ define i32 @bzhi32_c2_load(i32* %w, i32 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi32_c2_load:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    subl $8, %esp
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    xorl %ecx, %ecx
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $-1, %edx
-; X86-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
-; X86-BMI1NOTBM-NEXT:    movl (%eax), %esi
-; X86-BMI1NOTBM-NEXT:    andl %edx, %esi
-; X86-BMI1NOTBM-NEXT:    movl %edx, (%esp)
-; X86-BMI1NOTBM-NEXT:    calll use32 at PLT
-; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:    addl $8, %esp
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bzhi32_c2_load:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    subl $8, %esp
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    bzhil %ecx, (%eax), %esi
-; X86-BMI1BMI2-NEXT:    # kill: def $cl killed $cl killed $ecx def $ecx
-; X86-BMI1BMI2-NEXT:    negb %cl
-; X86-BMI1BMI2-NEXT:    movl $-1, %eax
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %eax
-; X86-BMI1BMI2-NEXT:    movl %eax, (%esp)
-; X86-BMI1BMI2-NEXT:    calll use32 at PLT
-; X86-BMI1BMI2-NEXT:    movl %esi, %eax
-; X86-BMI1BMI2-NEXT:    addl $8, %esp
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bzhi32_c2_load:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    subl $8, %esp
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    xorl %ecx, %ecx
+; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $-1, %edx
+; X86-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI1-NEXT:    shrl %cl, %edx
+; X86-BMI1-NEXT:    movl (%eax), %esi
+; X86-BMI1-NEXT:    andl %edx, %esi
+; X86-BMI1-NEXT:    movl %edx, (%esp)
+; X86-BMI1-NEXT:    calll use32 at PLT
+; X86-BMI1-NEXT:    movl %esi, %eax
+; X86-BMI1-NEXT:    addl $8, %esp
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bzhi32_c2_load:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    subl $8, %esp
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    bzhil %ecx, (%eax), %esi
+; X86-BMI2-NEXT:    # kill: def $cl killed $cl killed $ecx def $ecx
+; X86-BMI2-NEXT:    negb %cl
+; X86-BMI2-NEXT:    movl $-1, %eax
+; X86-BMI2-NEXT:    shrxl %ecx, %eax, %eax
+; X86-BMI2-NEXT:    movl %eax, (%esp)
+; X86-BMI2-NEXT:    calll use32 at PLT
+; X86-BMI2-NEXT:    movl %esi, %eax
+; X86-BMI2-NEXT:    addl $8, %esp
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi32_c2_load:
 ; X64-NOBMI:       # %bb.0:
@@ -2350,34 +2350,34 @@ define i32 @bzhi32_c2_load(i32* %w, i32 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    popq %rbx
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi32_c2_load:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    pushq %rbx
-; X64-BMI1NOTBM-NEXT:    movl %esi, %ecx
-; X64-BMI1NOTBM-NEXT:    negb %cl
-; X64-BMI1NOTBM-NEXT:    movl $-1, %eax
-; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-BMI1NOTBM-NEXT:    shrl %cl, %eax
-; X64-BMI1NOTBM-NEXT:    movl (%rdi), %ebx
-; X64-BMI1NOTBM-NEXT:    andl %eax, %ebx
-; X64-BMI1NOTBM-NEXT:    movl %eax, %edi
-; X64-BMI1NOTBM-NEXT:    callq use32 at PLT
-; X64-BMI1NOTBM-NEXT:    movl %ebx, %eax
-; X64-BMI1NOTBM-NEXT:    popq %rbx
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bzhi32_c2_load:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    pushq %rbx
-; X64-BMI1BMI2-NEXT:    bzhil %esi, (%rdi), %ebx
-; X64-BMI1BMI2-NEXT:    # kill: def $sil killed $sil killed $esi def $esi
-; X64-BMI1BMI2-NEXT:    negb %sil
-; X64-BMI1BMI2-NEXT:    movl $-1, %eax
-; X64-BMI1BMI2-NEXT:    shrxl %esi, %eax, %edi
-; X64-BMI1BMI2-NEXT:    callq use32 at PLT
-; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI1BMI2-NEXT:    popq %rbx
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bzhi32_c2_load:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    pushq %rbx
+; X64-BMI1-NEXT:    movl %esi, %ecx
+; X64-BMI1-NEXT:    negb %cl
+; X64-BMI1-NEXT:    movl $-1, %eax
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI1-NEXT:    shrl %cl, %eax
+; X64-BMI1-NEXT:    movl (%rdi), %ebx
+; X64-BMI1-NEXT:    andl %eax, %ebx
+; X64-BMI1-NEXT:    movl %eax, %edi
+; X64-BMI1-NEXT:    callq use32 at PLT
+; X64-BMI1-NEXT:    movl %ebx, %eax
+; X64-BMI1-NEXT:    popq %rbx
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bzhi32_c2_load:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    pushq %rbx
+; X64-BMI2-NEXT:    bzhil %esi, (%rdi), %ebx
+; X64-BMI2-NEXT:    # kill: def $sil killed $sil killed $esi def $esi
+; X64-BMI2-NEXT:    negb %sil
+; X64-BMI2-NEXT:    movl $-1, %eax
+; X64-BMI2-NEXT:    shrxl %esi, %eax, %edi
+; X64-BMI2-NEXT:    callq use32 at PLT
+; X64-BMI2-NEXT:    movl %ebx, %eax
+; X64-BMI2-NEXT:    popq %rbx
+; X64-BMI2-NEXT:    retq
   %val = load i32, i32* %w
   %numhighbits = sub i32 32, %numlowbits
   %mask = lshr i32 -1, %numhighbits
@@ -2406,42 +2406,42 @@ define i32 @bzhi32_c3_load_indexzext(i32* %w, i8 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi32_c3_load_indexzext:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    subl $8, %esp
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    xorl %ecx, %ecx
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $-1, %edx
-; X86-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
-; X86-BMI1NOTBM-NEXT:    movl (%eax), %esi
-; X86-BMI1NOTBM-NEXT:    andl %edx, %esi
-; X86-BMI1NOTBM-NEXT:    movl %edx, (%esp)
-; X86-BMI1NOTBM-NEXT:    calll use32 at PLT
-; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:    addl $8, %esp
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bzhi32_c3_load_indexzext:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    subl $8, %esp
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    bzhil %ecx, (%eax), %esi
-; X86-BMI1BMI2-NEXT:    # kill: def $cl killed $cl killed $ecx def $ecx
-; X86-BMI1BMI2-NEXT:    negb %cl
-; X86-BMI1BMI2-NEXT:    movl $-1, %eax
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %eax
-; X86-BMI1BMI2-NEXT:    movl %eax, (%esp)
-; X86-BMI1BMI2-NEXT:    calll use32 at PLT
-; X86-BMI1BMI2-NEXT:    movl %esi, %eax
-; X86-BMI1BMI2-NEXT:    addl $8, %esp
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bzhi32_c3_load_indexzext:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    subl $8, %esp
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    xorl %ecx, %ecx
+; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $-1, %edx
+; X86-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI1-NEXT:    shrl %cl, %edx
+; X86-BMI1-NEXT:    movl (%eax), %esi
+; X86-BMI1-NEXT:    andl %edx, %esi
+; X86-BMI1-NEXT:    movl %edx, (%esp)
+; X86-BMI1-NEXT:    calll use32 at PLT
+; X86-BMI1-NEXT:    movl %esi, %eax
+; X86-BMI1-NEXT:    addl $8, %esp
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bzhi32_c3_load_indexzext:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    subl $8, %esp
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    bzhil %ecx, (%eax), %esi
+; X86-BMI2-NEXT:    # kill: def $cl killed $cl killed $ecx def $ecx
+; X86-BMI2-NEXT:    negb %cl
+; X86-BMI2-NEXT:    movl $-1, %eax
+; X86-BMI2-NEXT:    shrxl %ecx, %eax, %eax
+; X86-BMI2-NEXT:    movl %eax, (%esp)
+; X86-BMI2-NEXT:    calll use32 at PLT
+; X86-BMI2-NEXT:    movl %esi, %eax
+; X86-BMI2-NEXT:    addl $8, %esp
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi32_c3_load_indexzext:
 ; X64-NOBMI:       # %bb.0:
@@ -2459,34 +2459,34 @@ define i32 @bzhi32_c3_load_indexzext(i32* %w, i8 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    popq %rbx
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi32_c3_load_indexzext:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    pushq %rbx
-; X64-BMI1NOTBM-NEXT:    movl %esi, %ecx
-; X64-BMI1NOTBM-NEXT:    negb %cl
-; X64-BMI1NOTBM-NEXT:    movl $-1, %eax
-; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-BMI1NOTBM-NEXT:    shrl %cl, %eax
-; X64-BMI1NOTBM-NEXT:    movl (%rdi), %ebx
-; X64-BMI1NOTBM-NEXT:    andl %eax, %ebx
-; X64-BMI1NOTBM-NEXT:    movl %eax, %edi
-; X64-BMI1NOTBM-NEXT:    callq use32 at PLT
-; X64-BMI1NOTBM-NEXT:    movl %ebx, %eax
-; X64-BMI1NOTBM-NEXT:    popq %rbx
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bzhi32_c3_load_indexzext:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    pushq %rbx
-; X64-BMI1BMI2-NEXT:    bzhil %esi, (%rdi), %ebx
-; X64-BMI1BMI2-NEXT:    # kill: def $sil killed $sil killed $esi def $esi
-; X64-BMI1BMI2-NEXT:    negb %sil
-; X64-BMI1BMI2-NEXT:    movl $-1, %eax
-; X64-BMI1BMI2-NEXT:    shrxl %esi, %eax, %edi
-; X64-BMI1BMI2-NEXT:    callq use32 at PLT
-; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI1BMI2-NEXT:    popq %rbx
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bzhi32_c3_load_indexzext:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    pushq %rbx
+; X64-BMI1-NEXT:    movl %esi, %ecx
+; X64-BMI1-NEXT:    negb %cl
+; X64-BMI1-NEXT:    movl $-1, %eax
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI1-NEXT:    shrl %cl, %eax
+; X64-BMI1-NEXT:    movl (%rdi), %ebx
+; X64-BMI1-NEXT:    andl %eax, %ebx
+; X64-BMI1-NEXT:    movl %eax, %edi
+; X64-BMI1-NEXT:    callq use32 at PLT
+; X64-BMI1-NEXT:    movl %ebx, %eax
+; X64-BMI1-NEXT:    popq %rbx
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bzhi32_c3_load_indexzext:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    pushq %rbx
+; X64-BMI2-NEXT:    bzhil %esi, (%rdi), %ebx
+; X64-BMI2-NEXT:    # kill: def $sil killed $sil killed $esi def $esi
+; X64-BMI2-NEXT:    negb %sil
+; X64-BMI2-NEXT:    movl $-1, %eax
+; X64-BMI2-NEXT:    shrxl %esi, %eax, %edi
+; X64-BMI2-NEXT:    callq use32 at PLT
+; X64-BMI2-NEXT:    movl %ebx, %eax
+; X64-BMI2-NEXT:    popq %rbx
+; X64-BMI2-NEXT:    retq
   %val = load i32, i32* %w
   %numhighbits = sub i8 32, %numlowbits
   %sh_prom = zext i8 %numhighbits to i32
@@ -2514,38 +2514,38 @@ define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi32_c4_commutative:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    subl $8, %esp
-; X86-BMI1NOTBM-NEXT:    xorl %ecx, %ecx
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
-; X86-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %esi
-; X86-BMI1NOTBM-NEXT:    movl %esi, (%esp)
-; X86-BMI1NOTBM-NEXT:    calll use32 at PLT
-; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %esi
-; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:    addl $8, %esp
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bzhi32_c4_commutative:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    subl $8, %esp
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
-; X86-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X86-BMI1BMI2-NEXT:    negb %al
-; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
-; X86-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %eax
-; X86-BMI1BMI2-NEXT:    movl %eax, (%esp)
-; X86-BMI1BMI2-NEXT:    calll use32 at PLT
-; X86-BMI1BMI2-NEXT:    bzhil %ebx, {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    addl $8, %esp
-; X86-BMI1BMI2-NEXT:    popl %ebx
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bzhi32_c4_commutative:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    subl $8, %esp
+; X86-BMI1-NEXT:    xorl %ecx, %ecx
+; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $-1, %esi
+; X86-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI1-NEXT:    shrl %cl, %esi
+; X86-BMI1-NEXT:    movl %esi, (%esp)
+; X86-BMI1-NEXT:    calll use32 at PLT
+; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    movl %esi, %eax
+; X86-BMI1-NEXT:    addl $8, %esp
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bzhi32_c4_commutative:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    subl $8, %esp
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movl %ebx, %eax
+; X86-BMI2-NEXT:    negb %al
+; X86-BMI2-NEXT:    movl $-1, %ecx
+; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
+; X86-BMI2-NEXT:    movl %eax, (%esp)
+; X86-BMI2-NEXT:    calll use32 at PLT
+; X86-BMI2-NEXT:    bzhil %ebx, {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    addl $8, %esp
+; X86-BMI2-NEXT:    popl %ebx
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi32_c4_commutative:
 ; X64-NOBMI:       # %bb.0:
@@ -2567,43 +2567,43 @@ define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    popq %rbp
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi32_c4_commutative:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    pushq %rbp
-; X64-BMI1NOTBM-NEXT:    pushq %rbx
-; X64-BMI1NOTBM-NEXT:    pushq %rax
-; X64-BMI1NOTBM-NEXT:    movl %esi, %ecx
-; X64-BMI1NOTBM-NEXT:    movl %edi, %ebx
-; X64-BMI1NOTBM-NEXT:    negb %cl
-; X64-BMI1NOTBM-NEXT:    movl $-1, %ebp
-; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-BMI1NOTBM-NEXT:    shrl %cl, %ebp
-; X64-BMI1NOTBM-NEXT:    movl %ebp, %edi
-; X64-BMI1NOTBM-NEXT:    callq use32 at PLT
-; X64-BMI1NOTBM-NEXT:    andl %ebx, %ebp
-; X64-BMI1NOTBM-NEXT:    movl %ebp, %eax
-; X64-BMI1NOTBM-NEXT:    addq $8, %rsp
-; X64-BMI1NOTBM-NEXT:    popq %rbx
-; X64-BMI1NOTBM-NEXT:    popq %rbp
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bzhi32_c4_commutative:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    pushq %rbp
-; X64-BMI1BMI2-NEXT:    pushq %rbx
-; X64-BMI1BMI2-NEXT:    pushq %rax
-; X64-BMI1BMI2-NEXT:    movl %esi, %ebx
-; X64-BMI1BMI2-NEXT:    movl %edi, %ebp
-; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI1BMI2-NEXT:    negb %al
-; X64-BMI1BMI2-NEXT:    movl $-1, %ecx
-; X64-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %edi
-; X64-BMI1BMI2-NEXT:    callq use32 at PLT
-; X64-BMI1BMI2-NEXT:    bzhil %ebx, %ebp, %eax
-; X64-BMI1BMI2-NEXT:    addq $8, %rsp
-; X64-BMI1BMI2-NEXT:    popq %rbx
-; X64-BMI1BMI2-NEXT:    popq %rbp
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bzhi32_c4_commutative:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    pushq %rbp
+; X64-BMI1-NEXT:    pushq %rbx
+; X64-BMI1-NEXT:    pushq %rax
+; X64-BMI1-NEXT:    movl %esi, %ecx
+; X64-BMI1-NEXT:    movl %edi, %ebx
+; X64-BMI1-NEXT:    negb %cl
+; X64-BMI1-NEXT:    movl $-1, %ebp
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI1-NEXT:    shrl %cl, %ebp
+; X64-BMI1-NEXT:    movl %ebp, %edi
+; X64-BMI1-NEXT:    callq use32 at PLT
+; X64-BMI1-NEXT:    andl %ebx, %ebp
+; X64-BMI1-NEXT:    movl %ebp, %eax
+; X64-BMI1-NEXT:    addq $8, %rsp
+; X64-BMI1-NEXT:    popq %rbx
+; X64-BMI1-NEXT:    popq %rbp
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bzhi32_c4_commutative:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    pushq %rbp
+; X64-BMI2-NEXT:    pushq %rbx
+; X64-BMI2-NEXT:    pushq %rax
+; X64-BMI2-NEXT:    movl %esi, %ebx
+; X64-BMI2-NEXT:    movl %edi, %ebp
+; X64-BMI2-NEXT:    movl %ebx, %eax
+; X64-BMI2-NEXT:    negb %al
+; X64-BMI2-NEXT:    movl $-1, %ecx
+; X64-BMI2-NEXT:    shrxl %eax, %ecx, %edi
+; X64-BMI2-NEXT:    callq use32 at PLT
+; X64-BMI2-NEXT:    bzhil %ebx, %ebp, %eax
+; X64-BMI2-NEXT:    addq $8, %rsp
+; X64-BMI2-NEXT:    popq %rbx
+; X64-BMI2-NEXT:    popq %rbp
+; X64-BMI2-NEXT:    retq
   %numhighbits = sub i32 32, %numlowbits
   %mask = lshr i32 -1, %numhighbits
   call void @use32(i32 %mask)
@@ -2646,64 +2646,64 @@ define i64 @bzhi64_c0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi64_c0:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    pushl %eax
-; X86-BMI1NOTBM-NEXT:    movb $64, %cl
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
-; X86-BMI1NOTBM-NEXT:    movl $-1, %edi
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB34_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
-; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB34_2:
-; X86-BMI1NOTBM-NEXT:    subl $8, %esp
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    calll use64 at PLT
-; X86-BMI1NOTBM-NEXT:    addl $16, %esp
-; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %esi
-; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %edi
-; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:    movl %edi, %edx
-; X86-BMI1NOTBM-NEXT:    addl $4, %esp
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bzhi64_c0:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %edi
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    pushl %eax
-; X86-BMI1BMI2-NEXT:    movb $64, %al
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movl $-1, %edi
-; X86-BMI1BMI2-NEXT:    shrxl %eax, %edi, %esi
-; X86-BMI1BMI2-NEXT:    testb $32, %al
-; X86-BMI1BMI2-NEXT:    je .LBB34_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %esi, %edi
-; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
-; X86-BMI1BMI2-NEXT:  .LBB34_2:
-; X86-BMI1BMI2-NEXT:    subl $8, %esp
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    pushl %edi
-; X86-BMI1BMI2-NEXT:    calll use64 at PLT
-; X86-BMI1BMI2-NEXT:    addl $16, %esp
-; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edi
-; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %esi
-; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:    movl %esi, %edx
-; X86-BMI1BMI2-NEXT:    addl $4, %esp
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %edi
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bzhi64_c0:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    pushl %eax
+; X86-BMI1-NEXT:    movb $64, %cl
+; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $-1, %esi
+; X86-BMI1-NEXT:    movl $-1, %edi
+; X86-BMI1-NEXT:    shrl %cl, %edi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB34_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %edi, %esi
+; X86-BMI1-NEXT:    xorl %edi, %edi
+; X86-BMI1-NEXT:  .LBB34_2:
+; X86-BMI1-NEXT:    subl $8, %esp
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    calll use64 at PLT
+; X86-BMI1-NEXT:    addl $16, %esp
+; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edi
+; X86-BMI1-NEXT:    movl %esi, %eax
+; X86-BMI1-NEXT:    movl %edi, %edx
+; X86-BMI1-NEXT:    addl $4, %esp
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bzhi64_c0:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %edi
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    pushl %eax
+; X86-BMI2-NEXT:    movb $64, %al
+; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movl $-1, %edi
+; X86-BMI2-NEXT:    shrxl %eax, %edi, %esi
+; X86-BMI2-NEXT:    testb $32, %al
+; X86-BMI2-NEXT:    je .LBB34_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %esi, %edi
+; X86-BMI2-NEXT:    xorl %esi, %esi
+; X86-BMI2-NEXT:  .LBB34_2:
+; X86-BMI2-NEXT:    subl $8, %esp
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    pushl %edi
+; X86-BMI2-NEXT:    calll use64 at PLT
+; X86-BMI2-NEXT:    addl $16, %esp
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edi
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    movl %edi, %eax
+; X86-BMI2-NEXT:    movl %esi, %edx
+; X86-BMI2-NEXT:    addl $4, %esp
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %edi
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_c0:
 ; X64-NOBMI:       # %bb.0:
@@ -2725,43 +2725,43 @@ define i64 @bzhi64_c0(i64 %val, i64 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    popq %r14
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi64_c0:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    pushq %r14
-; X64-BMI1NOTBM-NEXT:    pushq %rbx
-; X64-BMI1NOTBM-NEXT:    pushq %rax
-; X64-BMI1NOTBM-NEXT:    movq %rsi, %rcx
-; X64-BMI1NOTBM-NEXT:    movq %rdi, %r14
-; X64-BMI1NOTBM-NEXT:    negb %cl
-; X64-BMI1NOTBM-NEXT:    movq $-1, %rbx
-; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $rcx
-; X64-BMI1NOTBM-NEXT:    shrq %cl, %rbx
-; X64-BMI1NOTBM-NEXT:    movq %rbx, %rdi
-; X64-BMI1NOTBM-NEXT:    callq use64 at PLT
-; X64-BMI1NOTBM-NEXT:    andq %r14, %rbx
-; X64-BMI1NOTBM-NEXT:    movq %rbx, %rax
-; X64-BMI1NOTBM-NEXT:    addq $8, %rsp
-; X64-BMI1NOTBM-NEXT:    popq %rbx
-; X64-BMI1NOTBM-NEXT:    popq %r14
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bzhi64_c0:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    pushq %r14
-; X64-BMI1BMI2-NEXT:    pushq %rbx
-; X64-BMI1BMI2-NEXT:    pushq %rax
-; X64-BMI1BMI2-NEXT:    movq %rsi, %rbx
-; X64-BMI1BMI2-NEXT:    movq %rdi, %r14
-; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI1BMI2-NEXT:    negb %al
-; X64-BMI1BMI2-NEXT:    movq $-1, %rcx
-; X64-BMI1BMI2-NEXT:    shrxq %rax, %rcx, %rdi
-; X64-BMI1BMI2-NEXT:    callq use64 at PLT
-; X64-BMI1BMI2-NEXT:    bzhiq %rbx, %r14, %rax
-; X64-BMI1BMI2-NEXT:    addq $8, %rsp
-; X64-BMI1BMI2-NEXT:    popq %rbx
-; X64-BMI1BMI2-NEXT:    popq %r14
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bzhi64_c0:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    pushq %r14
+; X64-BMI1-NEXT:    pushq %rbx
+; X64-BMI1-NEXT:    pushq %rax
+; X64-BMI1-NEXT:    movq %rsi, %rcx
+; X64-BMI1-NEXT:    movq %rdi, %r14
+; X64-BMI1-NEXT:    negb %cl
+; X64-BMI1-NEXT:    movq $-1, %rbx
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI1-NEXT:    shrq %cl, %rbx
+; X64-BMI1-NEXT:    movq %rbx, %rdi
+; X64-BMI1-NEXT:    callq use64 at PLT
+; X64-BMI1-NEXT:    andq %r14, %rbx
+; X64-BMI1-NEXT:    movq %rbx, %rax
+; X64-BMI1-NEXT:    addq $8, %rsp
+; X64-BMI1-NEXT:    popq %rbx
+; X64-BMI1-NEXT:    popq %r14
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bzhi64_c0:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    pushq %r14
+; X64-BMI2-NEXT:    pushq %rbx
+; X64-BMI2-NEXT:    pushq %rax
+; X64-BMI2-NEXT:    movq %rsi, %rbx
+; X64-BMI2-NEXT:    movq %rdi, %r14
+; X64-BMI2-NEXT:    movl %ebx, %eax
+; X64-BMI2-NEXT:    negb %al
+; X64-BMI2-NEXT:    movq $-1, %rcx
+; X64-BMI2-NEXT:    shrxq %rax, %rcx, %rdi
+; X64-BMI2-NEXT:    callq use64 at PLT
+; X64-BMI2-NEXT:    bzhiq %rbx, %r14, %rax
+; X64-BMI2-NEXT:    addq $8, %rsp
+; X64-BMI2-NEXT:    popq %rbx
+; X64-BMI2-NEXT:    popq %r14
+; X64-BMI2-NEXT:    retq
   %numhighbits = sub i64 64, %numlowbits
   %mask = lshr i64 -1, %numhighbits
   call void @use64(i64 %mask)
@@ -2800,64 +2800,64 @@ define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi64_c1_indexzext:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    pushl %eax
-; X86-BMI1NOTBM-NEXT:    movb $64, %cl
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
-; X86-BMI1NOTBM-NEXT:    movl $-1, %edi
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB35_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
-; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB35_2:
-; X86-BMI1NOTBM-NEXT:    subl $8, %esp
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    calll use64 at PLT
-; X86-BMI1NOTBM-NEXT:    addl $16, %esp
-; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %esi
-; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %edi
-; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:    movl %edi, %edx
-; X86-BMI1NOTBM-NEXT:    addl $4, %esp
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bzhi64_c1_indexzext:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %edi
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    pushl %eax
-; X86-BMI1BMI2-NEXT:    movb $64, %al
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movl $-1, %edi
-; X86-BMI1BMI2-NEXT:    shrxl %eax, %edi, %esi
-; X86-BMI1BMI2-NEXT:    testb $32, %al
-; X86-BMI1BMI2-NEXT:    je .LBB35_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %esi, %edi
-; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
-; X86-BMI1BMI2-NEXT:  .LBB35_2:
-; X86-BMI1BMI2-NEXT:    subl $8, %esp
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    pushl %edi
-; X86-BMI1BMI2-NEXT:    calll use64 at PLT
-; X86-BMI1BMI2-NEXT:    addl $16, %esp
-; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edi
-; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %esi
-; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:    movl %esi, %edx
-; X86-BMI1BMI2-NEXT:    addl $4, %esp
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %edi
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bzhi64_c1_indexzext:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    pushl %eax
+; X86-BMI1-NEXT:    movb $64, %cl
+; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $-1, %esi
+; X86-BMI1-NEXT:    movl $-1, %edi
+; X86-BMI1-NEXT:    shrl %cl, %edi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB35_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %edi, %esi
+; X86-BMI1-NEXT:    xorl %edi, %edi
+; X86-BMI1-NEXT:  .LBB35_2:
+; X86-BMI1-NEXT:    subl $8, %esp
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    calll use64 at PLT
+; X86-BMI1-NEXT:    addl $16, %esp
+; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edi
+; X86-BMI1-NEXT:    movl %esi, %eax
+; X86-BMI1-NEXT:    movl %edi, %edx
+; X86-BMI1-NEXT:    addl $4, %esp
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bzhi64_c1_indexzext:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %edi
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    pushl %eax
+; X86-BMI2-NEXT:    movb $64, %al
+; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movl $-1, %edi
+; X86-BMI2-NEXT:    shrxl %eax, %edi, %esi
+; X86-BMI2-NEXT:    testb $32, %al
+; X86-BMI2-NEXT:    je .LBB35_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %esi, %edi
+; X86-BMI2-NEXT:    xorl %esi, %esi
+; X86-BMI2-NEXT:  .LBB35_2:
+; X86-BMI2-NEXT:    subl $8, %esp
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    pushl %edi
+; X86-BMI2-NEXT:    calll use64 at PLT
+; X86-BMI2-NEXT:    addl $16, %esp
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edi
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    movl %edi, %eax
+; X86-BMI2-NEXT:    movl %esi, %edx
+; X86-BMI2-NEXT:    addl $4, %esp
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %edi
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_c1_indexzext:
 ; X64-NOBMI:       # %bb.0:
@@ -2879,43 +2879,43 @@ define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    popq %r14
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi64_c1_indexzext:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    pushq %r14
-; X64-BMI1NOTBM-NEXT:    pushq %rbx
-; X64-BMI1NOTBM-NEXT:    pushq %rax
-; X64-BMI1NOTBM-NEXT:    movl %esi, %ecx
-; X64-BMI1NOTBM-NEXT:    movq %rdi, %r14
-; X64-BMI1NOTBM-NEXT:    negb %cl
-; X64-BMI1NOTBM-NEXT:    movq $-1, %rbx
-; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-BMI1NOTBM-NEXT:    shrq %cl, %rbx
-; X64-BMI1NOTBM-NEXT:    movq %rbx, %rdi
-; X64-BMI1NOTBM-NEXT:    callq use64 at PLT
-; X64-BMI1NOTBM-NEXT:    andq %r14, %rbx
-; X64-BMI1NOTBM-NEXT:    movq %rbx, %rax
-; X64-BMI1NOTBM-NEXT:    addq $8, %rsp
-; X64-BMI1NOTBM-NEXT:    popq %rbx
-; X64-BMI1NOTBM-NEXT:    popq %r14
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bzhi64_c1_indexzext:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    pushq %r14
-; X64-BMI1BMI2-NEXT:    pushq %rbx
-; X64-BMI1BMI2-NEXT:    pushq %rax
-; X64-BMI1BMI2-NEXT:    movl %esi, %ebx
-; X64-BMI1BMI2-NEXT:    movq %rdi, %r14
-; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI1BMI2-NEXT:    negb %al
-; X64-BMI1BMI2-NEXT:    movq $-1, %rcx
-; X64-BMI1BMI2-NEXT:    shrxq %rax, %rcx, %rdi
-; X64-BMI1BMI2-NEXT:    callq use64 at PLT
-; X64-BMI1BMI2-NEXT:    bzhiq %rbx, %r14, %rax
-; X64-BMI1BMI2-NEXT:    addq $8, %rsp
-; X64-BMI1BMI2-NEXT:    popq %rbx
-; X64-BMI1BMI2-NEXT:    popq %r14
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bzhi64_c1_indexzext:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    pushq %r14
+; X64-BMI1-NEXT:    pushq %rbx
+; X64-BMI1-NEXT:    pushq %rax
+; X64-BMI1-NEXT:    movl %esi, %ecx
+; X64-BMI1-NEXT:    movq %rdi, %r14
+; X64-BMI1-NEXT:    negb %cl
+; X64-BMI1-NEXT:    movq $-1, %rbx
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI1-NEXT:    shrq %cl, %rbx
+; X64-BMI1-NEXT:    movq %rbx, %rdi
+; X64-BMI1-NEXT:    callq use64 at PLT
+; X64-BMI1-NEXT:    andq %r14, %rbx
+; X64-BMI1-NEXT:    movq %rbx, %rax
+; X64-BMI1-NEXT:    addq $8, %rsp
+; X64-BMI1-NEXT:    popq %rbx
+; X64-BMI1-NEXT:    popq %r14
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bzhi64_c1_indexzext:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    pushq %r14
+; X64-BMI2-NEXT:    pushq %rbx
+; X64-BMI2-NEXT:    pushq %rax
+; X64-BMI2-NEXT:    movl %esi, %ebx
+; X64-BMI2-NEXT:    movq %rdi, %r14
+; X64-BMI2-NEXT:    movl %ebx, %eax
+; X64-BMI2-NEXT:    negb %al
+; X64-BMI2-NEXT:    movq $-1, %rcx
+; X64-BMI2-NEXT:    shrxq %rax, %rcx, %rdi
+; X64-BMI2-NEXT:    callq use64 at PLT
+; X64-BMI2-NEXT:    bzhiq %rbx, %r14, %rax
+; X64-BMI2-NEXT:    addq $8, %rsp
+; X64-BMI2-NEXT:    popq %rbx
+; X64-BMI2-NEXT:    popq %r14
+; X64-BMI2-NEXT:    retq
   %numhighbits = sub i8 64, %numlowbits
   %sh_prom = zext i8 %numhighbits to i64
   %mask = lshr i64 -1, %sh_prom
@@ -2958,70 +2958,70 @@ define i64 @bzhi64_c2_load(i64* %w, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    popl %ebx
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi64_c2_load:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %ebx
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1NOTBM-NEXT:    movb $64, %cl
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $-1, %eax
-; X86-BMI1NOTBM-NEXT:    movl $-1, %ebx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %ebx
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB36_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
-; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB36_2:
-; X86-BMI1NOTBM-NEXT:    movl 4(%edx), %esi
-; X86-BMI1NOTBM-NEXT:    andl %ebx, %esi
-; X86-BMI1NOTBM-NEXT:    movl (%edx), %edi
-; X86-BMI1NOTBM-NEXT:    andl %eax, %edi
-; X86-BMI1NOTBM-NEXT:    subl $8, %esp
-; X86-BMI1NOTBM-NEXT:    pushl %ebx
-; X86-BMI1NOTBM-NEXT:    pushl %eax
-; X86-BMI1NOTBM-NEXT:    calll use64 at PLT
-; X86-BMI1NOTBM-NEXT:    addl $16, %esp
-; X86-BMI1NOTBM-NEXT:    movl %edi, %eax
-; X86-BMI1NOTBM-NEXT:    movl %esi, %edx
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    popl %ebx
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bzhi64_c2_load:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    pushl %edi
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    movb $64, %bl
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %bl
-; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
-; X86-BMI1BMI2-NEXT:    shrxl %ebx, %ecx, %edx
-; X86-BMI1BMI2-NEXT:    testb $32, %bl
-; X86-BMI1BMI2-NEXT:    je .LBB36_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %edx, %ecx
-; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI1BMI2-NEXT:  .LBB36_2:
-; X86-BMI1BMI2-NEXT:    movl 4(%eax), %esi
-; X86-BMI1BMI2-NEXT:    andl %edx, %esi
-; X86-BMI1BMI2-NEXT:    movl (%eax), %edi
-; X86-BMI1BMI2-NEXT:    andl %ecx, %edi
-; X86-BMI1BMI2-NEXT:    subl $8, %esp
-; X86-BMI1BMI2-NEXT:    pushl %edx
-; X86-BMI1BMI2-NEXT:    pushl %ecx
-; X86-BMI1BMI2-NEXT:    calll use64 at PLT
-; X86-BMI1BMI2-NEXT:    addl $16, %esp
-; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:    movl %esi, %edx
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %edi
-; X86-BMI1BMI2-NEXT:    popl %ebx
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bzhi64_c2_load:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %ebx
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI1-NEXT:    movb $64, %cl
+; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $-1, %eax
+; X86-BMI1-NEXT:    movl $-1, %ebx
+; X86-BMI1-NEXT:    shrl %cl, %ebx
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB36_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %ebx, %eax
+; X86-BMI1-NEXT:    xorl %ebx, %ebx
+; X86-BMI1-NEXT:  .LBB36_2:
+; X86-BMI1-NEXT:    movl 4(%edx), %esi
+; X86-BMI1-NEXT:    andl %ebx, %esi
+; X86-BMI1-NEXT:    movl (%edx), %edi
+; X86-BMI1-NEXT:    andl %eax, %edi
+; X86-BMI1-NEXT:    subl $8, %esp
+; X86-BMI1-NEXT:    pushl %ebx
+; X86-BMI1-NEXT:    pushl %eax
+; X86-BMI1-NEXT:    calll use64 at PLT
+; X86-BMI1-NEXT:    addl $16, %esp
+; X86-BMI1-NEXT:    movl %edi, %eax
+; X86-BMI1-NEXT:    movl %esi, %edx
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    popl %ebx
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bzhi64_c2_load:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    pushl %edi
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb $64, %bl
+; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movl $-1, %ecx
+; X86-BMI2-NEXT:    shrxl %ebx, %ecx, %edx
+; X86-BMI2-NEXT:    testb $32, %bl
+; X86-BMI2-NEXT:    je .LBB36_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %edx, %ecx
+; X86-BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI2-NEXT:  .LBB36_2:
+; X86-BMI2-NEXT:    movl 4(%eax), %esi
+; X86-BMI2-NEXT:    andl %edx, %esi
+; X86-BMI2-NEXT:    movl (%eax), %edi
+; X86-BMI2-NEXT:    andl %ecx, %edi
+; X86-BMI2-NEXT:    subl $8, %esp
+; X86-BMI2-NEXT:    pushl %edx
+; X86-BMI2-NEXT:    pushl %ecx
+; X86-BMI2-NEXT:    calll use64 at PLT
+; X86-BMI2-NEXT:    addl $16, %esp
+; X86-BMI2-NEXT:    movl %edi, %eax
+; X86-BMI2-NEXT:    movl %esi, %edx
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %edi
+; X86-BMI2-NEXT:    popl %ebx
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_c2_load:
 ; X64-NOBMI:       # %bb.0:
@@ -3039,34 +3039,34 @@ define i64 @bzhi64_c2_load(i64* %w, i64 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    popq %rbx
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi64_c2_load:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    pushq %rbx
-; X64-BMI1NOTBM-NEXT:    movq %rsi, %rcx
-; X64-BMI1NOTBM-NEXT:    negb %cl
-; X64-BMI1NOTBM-NEXT:    movq $-1, %rax
-; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $rcx
-; X64-BMI1NOTBM-NEXT:    shrq %cl, %rax
-; X64-BMI1NOTBM-NEXT:    movq (%rdi), %rbx
-; X64-BMI1NOTBM-NEXT:    andq %rax, %rbx
-; X64-BMI1NOTBM-NEXT:    movq %rax, %rdi
-; X64-BMI1NOTBM-NEXT:    callq use64 at PLT
-; X64-BMI1NOTBM-NEXT:    movq %rbx, %rax
-; X64-BMI1NOTBM-NEXT:    popq %rbx
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bzhi64_c2_load:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    pushq %rbx
-; X64-BMI1BMI2-NEXT:    bzhiq %rsi, (%rdi), %rbx
-; X64-BMI1BMI2-NEXT:    # kill: def $sil killed $sil killed $rsi def $rsi
-; X64-BMI1BMI2-NEXT:    negb %sil
-; X64-BMI1BMI2-NEXT:    movq $-1, %rax
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rax, %rdi
-; X64-BMI1BMI2-NEXT:    callq use64 at PLT
-; X64-BMI1BMI2-NEXT:    movq %rbx, %rax
-; X64-BMI1BMI2-NEXT:    popq %rbx
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bzhi64_c2_load:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    pushq %rbx
+; X64-BMI1-NEXT:    movq %rsi, %rcx
+; X64-BMI1-NEXT:    negb %cl
+; X64-BMI1-NEXT:    movq $-1, %rax
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI1-NEXT:    shrq %cl, %rax
+; X64-BMI1-NEXT:    movq (%rdi), %rbx
+; X64-BMI1-NEXT:    andq %rax, %rbx
+; X64-BMI1-NEXT:    movq %rax, %rdi
+; X64-BMI1-NEXT:    callq use64 at PLT
+; X64-BMI1-NEXT:    movq %rbx, %rax
+; X64-BMI1-NEXT:    popq %rbx
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bzhi64_c2_load:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    pushq %rbx
+; X64-BMI2-NEXT:    bzhiq %rsi, (%rdi), %rbx
+; X64-BMI2-NEXT:    # kill: def $sil killed $sil killed $rsi def $rsi
+; X64-BMI2-NEXT:    negb %sil
+; X64-BMI2-NEXT:    movq $-1, %rax
+; X64-BMI2-NEXT:    shrxq %rsi, %rax, %rdi
+; X64-BMI2-NEXT:    callq use64 at PLT
+; X64-BMI2-NEXT:    movq %rbx, %rax
+; X64-BMI2-NEXT:    popq %rbx
+; X64-BMI2-NEXT:    retq
   %val = load i64, i64* %w
   %numhighbits = sub i64 64, %numlowbits
   %mask = lshr i64 -1, %numhighbits
@@ -3109,70 +3109,70 @@ define i64 @bzhi64_c3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    popl %ebx
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi64_c3_load_indexzext:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %ebx
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1NOTBM-NEXT:    movb $64, %cl
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $-1, %eax
-; X86-BMI1NOTBM-NEXT:    movl $-1, %ebx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %ebx
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB37_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
-; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
-; X86-BMI1NOTBM-NEXT:  .LBB37_2:
-; X86-BMI1NOTBM-NEXT:    movl 4(%edx), %esi
-; X86-BMI1NOTBM-NEXT:    andl %ebx, %esi
-; X86-BMI1NOTBM-NEXT:    movl (%edx), %edi
-; X86-BMI1NOTBM-NEXT:    andl %eax, %edi
-; X86-BMI1NOTBM-NEXT:    subl $8, %esp
-; X86-BMI1NOTBM-NEXT:    pushl %ebx
-; X86-BMI1NOTBM-NEXT:    pushl %eax
-; X86-BMI1NOTBM-NEXT:    calll use64 at PLT
-; X86-BMI1NOTBM-NEXT:    addl $16, %esp
-; X86-BMI1NOTBM-NEXT:    movl %edi, %eax
-; X86-BMI1NOTBM-NEXT:    movl %esi, %edx
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    popl %ebx
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bzhi64_c3_load_indexzext:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %ebx
-; X86-BMI1BMI2-NEXT:    pushl %edi
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    movb $64, %bl
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %bl
-; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
-; X86-BMI1BMI2-NEXT:    shrxl %ebx, %ecx, %edx
-; X86-BMI1BMI2-NEXT:    testb $32, %bl
-; X86-BMI1BMI2-NEXT:    je .LBB37_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %edx, %ecx
-; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI1BMI2-NEXT:  .LBB37_2:
-; X86-BMI1BMI2-NEXT:    movl 4(%eax), %esi
-; X86-BMI1BMI2-NEXT:    andl %edx, %esi
-; X86-BMI1BMI2-NEXT:    movl (%eax), %edi
-; X86-BMI1BMI2-NEXT:    andl %ecx, %edi
-; X86-BMI1BMI2-NEXT:    subl $8, %esp
-; X86-BMI1BMI2-NEXT:    pushl %edx
-; X86-BMI1BMI2-NEXT:    pushl %ecx
-; X86-BMI1BMI2-NEXT:    calll use64 at PLT
-; X86-BMI1BMI2-NEXT:    addl $16, %esp
-; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:    movl %esi, %edx
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %edi
-; X86-BMI1BMI2-NEXT:    popl %ebx
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bzhi64_c3_load_indexzext:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %ebx
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI1-NEXT:    movb $64, %cl
+; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $-1, %eax
+; X86-BMI1-NEXT:    movl $-1, %ebx
+; X86-BMI1-NEXT:    shrl %cl, %ebx
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB37_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %ebx, %eax
+; X86-BMI1-NEXT:    xorl %ebx, %ebx
+; X86-BMI1-NEXT:  .LBB37_2:
+; X86-BMI1-NEXT:    movl 4(%edx), %esi
+; X86-BMI1-NEXT:    andl %ebx, %esi
+; X86-BMI1-NEXT:    movl (%edx), %edi
+; X86-BMI1-NEXT:    andl %eax, %edi
+; X86-BMI1-NEXT:    subl $8, %esp
+; X86-BMI1-NEXT:    pushl %ebx
+; X86-BMI1-NEXT:    pushl %eax
+; X86-BMI1-NEXT:    calll use64 at PLT
+; X86-BMI1-NEXT:    addl $16, %esp
+; X86-BMI1-NEXT:    movl %edi, %eax
+; X86-BMI1-NEXT:    movl %esi, %edx
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    popl %ebx
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bzhi64_c3_load_indexzext:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    pushl %edi
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb $64, %bl
+; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT:    movl $-1, %ecx
+; X86-BMI2-NEXT:    shrxl %ebx, %ecx, %edx
+; X86-BMI2-NEXT:    testb $32, %bl
+; X86-BMI2-NEXT:    je .LBB37_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %edx, %ecx
+; X86-BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI2-NEXT:  .LBB37_2:
+; X86-BMI2-NEXT:    movl 4(%eax), %esi
+; X86-BMI2-NEXT:    andl %edx, %esi
+; X86-BMI2-NEXT:    movl (%eax), %edi
+; X86-BMI2-NEXT:    andl %ecx, %edi
+; X86-BMI2-NEXT:    subl $8, %esp
+; X86-BMI2-NEXT:    pushl %edx
+; X86-BMI2-NEXT:    pushl %ecx
+; X86-BMI2-NEXT:    calll use64 at PLT
+; X86-BMI2-NEXT:    addl $16, %esp
+; X86-BMI2-NEXT:    movl %edi, %eax
+; X86-BMI2-NEXT:    movl %esi, %edx
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %edi
+; X86-BMI2-NEXT:    popl %ebx
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_c3_load_indexzext:
 ; X64-NOBMI:       # %bb.0:
@@ -3190,35 +3190,35 @@ define i64 @bzhi64_c3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    popq %rbx
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi64_c3_load_indexzext:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    pushq %rbx
-; X64-BMI1NOTBM-NEXT:    movl %esi, %ecx
-; X64-BMI1NOTBM-NEXT:    negb %cl
-; X64-BMI1NOTBM-NEXT:    movq $-1, %rax
-; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-BMI1NOTBM-NEXT:    shrq %cl, %rax
-; X64-BMI1NOTBM-NEXT:    movq (%rdi), %rbx
-; X64-BMI1NOTBM-NEXT:    andq %rax, %rbx
-; X64-BMI1NOTBM-NEXT:    movq %rax, %rdi
-; X64-BMI1NOTBM-NEXT:    callq use64 at PLT
-; X64-BMI1NOTBM-NEXT:    movq %rbx, %rax
-; X64-BMI1NOTBM-NEXT:    popq %rbx
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bzhi64_c3_load_indexzext:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    pushq %rbx
-; X64-BMI1BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-BMI1BMI2-NEXT:    bzhiq %rsi, (%rdi), %rbx
-; X64-BMI1BMI2-NEXT:    # kill: def $sil killed $sil killed $rsi def $rsi
-; X64-BMI1BMI2-NEXT:    negb %sil
-; X64-BMI1BMI2-NEXT:    movq $-1, %rax
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rax, %rdi
-; X64-BMI1BMI2-NEXT:    callq use64 at PLT
-; X64-BMI1BMI2-NEXT:    movq %rbx, %rax
-; X64-BMI1BMI2-NEXT:    popq %rbx
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bzhi64_c3_load_indexzext:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    pushq %rbx
+; X64-BMI1-NEXT:    movl %esi, %ecx
+; X64-BMI1-NEXT:    negb %cl
+; X64-BMI1-NEXT:    movq $-1, %rax
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI1-NEXT:    shrq %cl, %rax
+; X64-BMI1-NEXT:    movq (%rdi), %rbx
+; X64-BMI1-NEXT:    andq %rax, %rbx
+; X64-BMI1-NEXT:    movq %rax, %rdi
+; X64-BMI1-NEXT:    callq use64 at PLT
+; X64-BMI1-NEXT:    movq %rbx, %rax
+; X64-BMI1-NEXT:    popq %rbx
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bzhi64_c3_load_indexzext:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    pushq %rbx
+; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-BMI2-NEXT:    bzhiq %rsi, (%rdi), %rbx
+; X64-BMI2-NEXT:    # kill: def $sil killed $sil killed $rsi def $rsi
+; X64-BMI2-NEXT:    negb %sil
+; X64-BMI2-NEXT:    movq $-1, %rax
+; X64-BMI2-NEXT:    shrxq %rsi, %rax, %rdi
+; X64-BMI2-NEXT:    callq use64 at PLT
+; X64-BMI2-NEXT:    movq %rbx, %rax
+; X64-BMI2-NEXT:    popq %rbx
+; X64-BMI2-NEXT:    retq
   %val = load i64, i64* %w
   %numhighbits = sub i8 64, %numlowbits
   %sh_prom = zext i8 %numhighbits to i64
@@ -3259,64 +3259,64 @@ define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi64_c4_commutative:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    pushl %eax
-; X86-BMI1NOTBM-NEXT:    movb $64, %cl
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
-; X86-BMI1NOTBM-NEXT:    movl $-1, %edi
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB38_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
-; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB38_2:
-; X86-BMI1NOTBM-NEXT:    subl $8, %esp
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    calll use64 at PLT
-; X86-BMI1NOTBM-NEXT:    addl $16, %esp
-; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %esi
-; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %edi
-; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:    movl %edi, %edx
-; X86-BMI1NOTBM-NEXT:    addl $4, %esp
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bzhi64_c4_commutative:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %edi
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    pushl %eax
-; X86-BMI1BMI2-NEXT:    movb $64, %al
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movl $-1, %edi
-; X86-BMI1BMI2-NEXT:    shrxl %eax, %edi, %esi
-; X86-BMI1BMI2-NEXT:    testb $32, %al
-; X86-BMI1BMI2-NEXT:    je .LBB38_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %esi, %edi
-; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
-; X86-BMI1BMI2-NEXT:  .LBB38_2:
-; X86-BMI1BMI2-NEXT:    subl $8, %esp
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    pushl %edi
-; X86-BMI1BMI2-NEXT:    calll use64 at PLT
-; X86-BMI1BMI2-NEXT:    addl $16, %esp
-; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edi
-; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %esi
-; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:    movl %esi, %edx
-; X86-BMI1BMI2-NEXT:    addl $4, %esp
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %edi
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bzhi64_c4_commutative:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    pushl %eax
+; X86-BMI1-NEXT:    movb $64, %cl
+; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $-1, %esi
+; X86-BMI1-NEXT:    movl $-1, %edi
+; X86-BMI1-NEXT:    shrl %cl, %edi
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB38_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %edi, %esi
+; X86-BMI1-NEXT:    xorl %edi, %edi
+; X86-BMI1-NEXT:  .LBB38_2:
+; X86-BMI1-NEXT:    subl $8, %esp
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    calll use64 at PLT
+; X86-BMI1-NEXT:    addl $16, %esp
+; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edi
+; X86-BMI1-NEXT:    movl %esi, %eax
+; X86-BMI1-NEXT:    movl %edi, %edx
+; X86-BMI1-NEXT:    addl $4, %esp
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bzhi64_c4_commutative:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %edi
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    pushl %eax
+; X86-BMI2-NEXT:    movb $64, %al
+; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    movl $-1, %edi
+; X86-BMI2-NEXT:    shrxl %eax, %edi, %esi
+; X86-BMI2-NEXT:    testb $32, %al
+; X86-BMI2-NEXT:    je .LBB38_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %esi, %edi
+; X86-BMI2-NEXT:    xorl %esi, %esi
+; X86-BMI2-NEXT:  .LBB38_2:
+; X86-BMI2-NEXT:    subl $8, %esp
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    pushl %edi
+; X86-BMI2-NEXT:    calll use64 at PLT
+; X86-BMI2-NEXT:    addl $16, %esp
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edi
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    movl %edi, %eax
+; X86-BMI2-NEXT:    movl %esi, %edx
+; X86-BMI2-NEXT:    addl $4, %esp
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %edi
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_c4_commutative:
 ; X64-NOBMI:       # %bb.0:
@@ -3338,43 +3338,43 @@ define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    popq %r14
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi64_c4_commutative:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    pushq %r14
-; X64-BMI1NOTBM-NEXT:    pushq %rbx
-; X64-BMI1NOTBM-NEXT:    pushq %rax
-; X64-BMI1NOTBM-NEXT:    movq %rsi, %rcx
-; X64-BMI1NOTBM-NEXT:    movq %rdi, %r14
-; X64-BMI1NOTBM-NEXT:    negb %cl
-; X64-BMI1NOTBM-NEXT:    movq $-1, %rbx
-; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $rcx
-; X64-BMI1NOTBM-NEXT:    shrq %cl, %rbx
-; X64-BMI1NOTBM-NEXT:    movq %rbx, %rdi
-; X64-BMI1NOTBM-NEXT:    callq use64 at PLT
-; X64-BMI1NOTBM-NEXT:    andq %r14, %rbx
-; X64-BMI1NOTBM-NEXT:    movq %rbx, %rax
-; X64-BMI1NOTBM-NEXT:    addq $8, %rsp
-; X64-BMI1NOTBM-NEXT:    popq %rbx
-; X64-BMI1NOTBM-NEXT:    popq %r14
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bzhi64_c4_commutative:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    pushq %r14
-; X64-BMI1BMI2-NEXT:    pushq %rbx
-; X64-BMI1BMI2-NEXT:    pushq %rax
-; X64-BMI1BMI2-NEXT:    movq %rsi, %rbx
-; X64-BMI1BMI2-NEXT:    movq %rdi, %r14
-; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI1BMI2-NEXT:    negb %al
-; X64-BMI1BMI2-NEXT:    movq $-1, %rcx
-; X64-BMI1BMI2-NEXT:    shrxq %rax, %rcx, %rdi
-; X64-BMI1BMI2-NEXT:    callq use64 at PLT
-; X64-BMI1BMI2-NEXT:    bzhiq %rbx, %r14, %rax
-; X64-BMI1BMI2-NEXT:    addq $8, %rsp
-; X64-BMI1BMI2-NEXT:    popq %rbx
-; X64-BMI1BMI2-NEXT:    popq %r14
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bzhi64_c4_commutative:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    pushq %r14
+; X64-BMI1-NEXT:    pushq %rbx
+; X64-BMI1-NEXT:    pushq %rax
+; X64-BMI1-NEXT:    movq %rsi, %rcx
+; X64-BMI1-NEXT:    movq %rdi, %r14
+; X64-BMI1-NEXT:    negb %cl
+; X64-BMI1-NEXT:    movq $-1, %rbx
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI1-NEXT:    shrq %cl, %rbx
+; X64-BMI1-NEXT:    movq %rbx, %rdi
+; X64-BMI1-NEXT:    callq use64 at PLT
+; X64-BMI1-NEXT:    andq %r14, %rbx
+; X64-BMI1-NEXT:    movq %rbx, %rax
+; X64-BMI1-NEXT:    addq $8, %rsp
+; X64-BMI1-NEXT:    popq %rbx
+; X64-BMI1-NEXT:    popq %r14
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bzhi64_c4_commutative:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    pushq %r14
+; X64-BMI2-NEXT:    pushq %rbx
+; X64-BMI2-NEXT:    pushq %rax
+; X64-BMI2-NEXT:    movq %rsi, %rbx
+; X64-BMI2-NEXT:    movq %rdi, %r14
+; X64-BMI2-NEXT:    movl %ebx, %eax
+; X64-BMI2-NEXT:    negb %al
+; X64-BMI2-NEXT:    movq $-1, %rcx
+; X64-BMI2-NEXT:    shrxq %rax, %rcx, %rdi
+; X64-BMI2-NEXT:    callq use64 at PLT
+; X64-BMI2-NEXT:    bzhiq %rbx, %r14, %rax
+; X64-BMI2-NEXT:    addq $8, %rsp
+; X64-BMI2-NEXT:    popq %rbx
+; X64-BMI2-NEXT:    popq %r14
+; X64-BMI2-NEXT:    retq
   %numhighbits = sub i64 64, %numlowbits
   %mask = lshr i64 -1, %numhighbits
   call void @use64(i64 %mask)
@@ -3400,32 +3400,32 @@ define i32 @bzhi64_32_c0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi64_32_c0:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movb $64, %cl
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $-1, %eax
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB39_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl $-1, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB39_2:
-; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bzhi64_32_c0:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb $64, %cl
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl $-1, %eax
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB39_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %eax
-; X86-BMI1BMI2-NEXT:  .LBB39_2:
-; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bzhi64_32_c0:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb $64, %cl
+; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl $-1, %eax
+; X86-BMI1-NEXT:    shrl %cl, %eax
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    jne .LBB39_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl $-1, %eax
+; X86-BMI1-NEXT:  .LBB39_2:
+; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bzhi64_32_c0:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb $64, %cl
+; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    movl $-1, %eax
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB39_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    shrxl %ecx, %eax, %eax
+; X86-BMI2-NEXT:  .LBB39_2:
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_32_c0:
 ; X64-NOBMI:       # %bb.0:
@@ -3438,16 +3438,16 @@ define i32 @bzhi64_32_c0(i64 %val, i64 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi64_32_c0:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrl %esi, %edi, %eax
-; X64-BMI1NOTBM-NEXT:    retq
+; X64-BMI1-LABEL: bzhi64_32_c0:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
+; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI1BMI2-LABEL: bzhi64_32_c0:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI2-LABEL: bzhi64_32_c0:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
+; X64-BMI2-NEXT:    retq
   %numhighbits = sub i64 64, %numlowbits
   %mask = lshr i64 -1, %numhighbits
   %masked = and i64 %mask, %val
@@ -3467,18 +3467,18 @@ define i32 @bzhi64_32_c1(i64 %val, i32 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi64_32_c1:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1NOTBM-NEXT:    shll $8, %eax
-; X86-BMI1NOTBM-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    retl
+; X86-BMI1-LABEL: bzhi64_32_c1:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    shll $8, %eax
+; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    retl
 ;
-; X86-BMI1BMI2-LABEL: bzhi64_32_c1:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI2-LABEL: bzhi64_32_c1:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_32_c1:
 ; X64-NOBMI:       # %bb.0:
@@ -3491,16 +3491,16 @@ define i32 @bzhi64_32_c1(i64 %val, i32 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi64_32_c1:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrl %esi, %edi, %eax
-; X64-BMI1NOTBM-NEXT:    retq
+; X64-BMI1-LABEL: bzhi64_32_c1:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
+; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI1BMI2-LABEL: bzhi64_32_c1:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI2-LABEL: bzhi64_32_c1:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
+; X64-BMI2-NEXT:    retq
   %truncval = trunc i64 %val to i32
   %numhighbits = sub i32 32, %numlowbits
   %mask = lshr i32 -1, %numhighbits
@@ -3521,18 +3521,18 @@ define i32 @bzhi64_32_c2(i64 %val, i32 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi64_32_c2:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1NOTBM-NEXT:    shll $8, %eax
-; X86-BMI1NOTBM-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    retl
+; X86-BMI1-LABEL: bzhi64_32_c2:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    shll $8, %eax
+; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    retl
 ;
-; X86-BMI1BMI2-LABEL: bzhi64_32_c2:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI2-LABEL: bzhi64_32_c2:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_32_c2:
 ; X64-NOBMI:       # %bb.0:
@@ -3545,16 +3545,16 @@ define i32 @bzhi64_32_c2(i64 %val, i32 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi64_32_c2:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrl %esi, %edi, %eax
-; X64-BMI1NOTBM-NEXT:    retq
+; X64-BMI1-LABEL: bzhi64_32_c2:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
+; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI1BMI2-LABEL: bzhi64_32_c2:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI2-LABEL: bzhi64_32_c2:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
+; X64-BMI2-NEXT:    retq
   %numhighbits = sub i32 32, %numlowbits
   %mask = lshr i32 -1, %numhighbits
   %zextmask = zext i32 %mask to i64
@@ -3592,25 +3592,25 @@ define i32 @bzhi64_32_c3(i64 %val, i64 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi64_32_c3:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    movq %rsi, %rcx
-; X64-BMI1NOTBM-NEXT:    negb %cl
-; X64-BMI1NOTBM-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
-; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $rcx
-; X64-BMI1NOTBM-NEXT:    shrq %cl, %rax
-; X64-BMI1NOTBM-NEXT:    andl %edi, %eax
-; X64-BMI1NOTBM-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bzhi64_32_c3:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    negb %sil
-; X64-BMI1BMI2-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rax, %rax
-; X64-BMI1BMI2-NEXT:    andl %edi, %eax
-; X64-BMI1BMI2-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bzhi64_32_c3:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    movq %rsi, %rcx
+; X64-BMI1-NEXT:    negb %cl
+; X64-BMI1-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
+; X64-BMI1-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI1-NEXT:    shrq %cl, %rax
+; X64-BMI1-NEXT:    andl %edi, %eax
+; X64-BMI1-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bzhi64_32_c3:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    negb %sil
+; X64-BMI2-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
+; X64-BMI2-NEXT:    shrxq %rsi, %rax, %rax
+; X64-BMI2-NEXT:    andl %edi, %eax
+; X64-BMI2-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-BMI2-NEXT:    retq
   %numhighbits = sub i64 64, %numlowbits
   %mask = lshr i64 4294967295, %numhighbits
   %masked = and i64 %mask, %val
@@ -3633,18 +3633,18 @@ define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi32_d0:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1NOTBM-NEXT:    shll $8, %eax
-; X86-BMI1NOTBM-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    retl
+; X86-BMI1-LABEL: bzhi32_d0:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    shll $8, %eax
+; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    retl
 ;
-; X86-BMI1BMI2-LABEL: bzhi32_d0:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI2-LABEL: bzhi32_d0:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi32_d0:
 ; X64-NOBMI:       # %bb.0:
@@ -3656,16 +3656,16 @@ define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    shrl %cl, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi32_d0:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrl %esi, %edi, %eax
-; X64-BMI1NOTBM-NEXT:    retq
+; X64-BMI1-LABEL: bzhi32_d0:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
+; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI1BMI2-LABEL: bzhi32_d0:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI2-LABEL: bzhi32_d0:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
+; X64-BMI2-NEXT:    retq
   %numhighbits = sub i32 32, %numlowbits
   %highbitscleared = shl i32 %val, %numhighbits
   %masked = lshr i32 %highbitscleared, %numhighbits
@@ -3683,18 +3683,18 @@ define i32 @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi32_d1_indexzext:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1NOTBM-NEXT:    shll $8, %eax
-; X86-BMI1NOTBM-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    retl
+; X86-BMI1-LABEL: bzhi32_d1_indexzext:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    shll $8, %eax
+; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    retl
 ;
-; X86-BMI1BMI2-LABEL: bzhi32_d1_indexzext:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI2-LABEL: bzhi32_d1_indexzext:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi32_d1_indexzext:
 ; X64-NOBMI:       # %bb.0:
@@ -3706,16 +3706,16 @@ define i32 @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    shrl %cl, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi32_d1_indexzext:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrl %esi, %edi, %eax
-; X64-BMI1NOTBM-NEXT:    retq
+; X64-BMI1-LABEL: bzhi32_d1_indexzext:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
+; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI1BMI2-LABEL: bzhi32_d1_indexzext:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI2-LABEL: bzhi32_d1_indexzext:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
+; X64-BMI2-NEXT:    retq
   %numhighbits = sub i8 32, %numlowbits
   %sh_prom = zext i8 %numhighbits to i32
   %highbitscleared = shl i32 %val, %sh_prom
@@ -3735,20 +3735,20 @@ define i32 @bzhi32_d2_load(i32* %w, i32 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi32_d2_load:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    shll $8, %ecx
-; X86-BMI1NOTBM-NEXT:    bextrl %ecx, (%eax), %eax
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bzhi32_d2_load:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    bzhil %ecx, (%eax), %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bzhi32_d2_load:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    shll $8, %ecx
+; X86-BMI1-NEXT:    bextrl %ecx, (%eax), %eax
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bzhi32_d2_load:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    bzhil %ecx, (%eax), %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi32_d2_load:
 ; X64-NOBMI:       # %bb.0:
@@ -3760,16 +3760,16 @@ define i32 @bzhi32_d2_load(i32* %w, i32 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    shrl %cl, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi32_d2_load:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrl %esi, (%rdi), %eax
-; X64-BMI1NOTBM-NEXT:    retq
+; X64-BMI1-LABEL: bzhi32_d2_load:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrl %esi, (%rdi), %eax
+; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI1BMI2-LABEL: bzhi32_d2_load:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhil %esi, (%rdi), %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI2-LABEL: bzhi32_d2_load:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    bzhil %esi, (%rdi), %eax
+; X64-BMI2-NEXT:    retq
   %val = load i32, i32* %w
   %numhighbits = sub i32 32, %numlowbits
   %highbitscleared = shl i32 %val, %numhighbits
@@ -3789,20 +3789,20 @@ define i32 @bzhi32_d3_load_indexzext(i32* %w, i8 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi32_d3_load_indexzext:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    shll $8, %ecx
-; X86-BMI1NOTBM-NEXT:    bextrl %ecx, (%eax), %eax
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bzhi32_d3_load_indexzext:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    bzhil %ecx, (%eax), %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bzhi32_d3_load_indexzext:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    shll $8, %ecx
+; X86-BMI1-NEXT:    bextrl %ecx, (%eax), %eax
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bzhi32_d3_load_indexzext:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    bzhil %ecx, (%eax), %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi32_d3_load_indexzext:
 ; X64-NOBMI:       # %bb.0:
@@ -3814,16 +3814,16 @@ define i32 @bzhi32_d3_load_indexzext(i32* %w, i8 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    shrl %cl, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi32_d3_load_indexzext:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrl %esi, (%rdi), %eax
-; X64-BMI1NOTBM-NEXT:    retq
+; X64-BMI1-LABEL: bzhi32_d3_load_indexzext:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrl %esi, (%rdi), %eax
+; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI1BMI2-LABEL: bzhi32_d3_load_indexzext:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhil %esi, (%rdi), %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI2-LABEL: bzhi32_d3_load_indexzext:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    bzhil %esi, (%rdi), %eax
+; X64-BMI2-NEXT:    retq
   %val = load i32, i32* %w
   %numhighbits = sub i8 32, %numlowbits
   %sh_prom = zext i8 %numhighbits to i32
@@ -3874,76 +3874,76 @@ define i64 @bzhi64_d0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    popl %ebx
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi64_d0:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %ebx
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movb $64, %cl
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl %edx, %esi
-; X86-BMI1NOTBM-NEXT:    shll %cl, %esi
-; X86-BMI1NOTBM-NEXT:    shldl %cl, %edx, %eax
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    movl %esi, %edi
-; X86-BMI1NOTBM-NEXT:    jne .LBB47_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB47_2:
-; X86-BMI1NOTBM-NEXT:    movl %edi, %eax
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
-; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    movl $0, %edx
-; X86-BMI1NOTBM-NEXT:    jne .LBB47_4
-; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl %esi, %ebx
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB47_4:
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %ebx
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB47_6
-; X86-BMI1NOTBM-NEXT:  # %bb.5:
-; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB47_6:
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    popl %ebx
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bzhi64_d0:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %edi
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1BMI2-NEXT:    movb $64, %cl
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %esi
-; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %edi
-; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB47_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %edi, %esi
-; X86-BMI1BMI2-NEXT:    movl $0, %edi
-; X86-BMI1BMI2-NEXT:  .LBB47_2:
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    jne .LBB47_4
-; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB47_4:
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edi
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    jne .LBB47_6
-; X86-BMI1BMI2-NEXT:  # %bb.5:
-; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:  .LBB47_6:
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %edi
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bzhi64_d0:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %ebx
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movb $64, %cl
+; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl %edx, %esi
+; X86-BMI1-NEXT:    shll %cl, %esi
+; X86-BMI1-NEXT:    shldl %cl, %edx, %eax
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    movl %esi, %edi
+; X86-BMI1-NEXT:    jne .LBB47_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %eax, %edi
+; X86-BMI1-NEXT:  .LBB47_2:
+; X86-BMI1-NEXT:    movl %edi, %eax
+; X86-BMI1-NEXT:    shrl %cl, %eax
+; X86-BMI1-NEXT:    xorl %ebx, %ebx
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    movl $0, %edx
+; X86-BMI1-NEXT:    jne .LBB47_4
+; X86-BMI1-NEXT:  # %bb.3:
+; X86-BMI1-NEXT:    movl %esi, %ebx
+; X86-BMI1-NEXT:    movl %eax, %edx
+; X86-BMI1-NEXT:  .LBB47_4:
+; X86-BMI1-NEXT:    shrdl %cl, %edi, %ebx
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    jne .LBB47_6
+; X86-BMI1-NEXT:  # %bb.5:
+; X86-BMI1-NEXT:    movl %ebx, %eax
+; X86-BMI1-NEXT:  .LBB47_6:
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    popl %ebx
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bzhi64_d0:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %edi
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    movb $64, %cl
+; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    shldl %cl, %eax, %esi
+; X86-BMI2-NEXT:    shlxl %ecx, %eax, %edi
+; X86-BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB47_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %edi, %esi
+; X86-BMI2-NEXT:    movl $0, %edi
+; X86-BMI2-NEXT:  .LBB47_2:
+; X86-BMI2-NEXT:    shrxl %ecx, %esi, %eax
+; X86-BMI2-NEXT:    jne .LBB47_4
+; X86-BMI2-NEXT:  # %bb.3:
+; X86-BMI2-NEXT:    movl %eax, %edx
+; X86-BMI2-NEXT:  .LBB47_4:
+; X86-BMI2-NEXT:    shrdl %cl, %esi, %edi
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    jne .LBB47_6
+; X86-BMI2-NEXT:  # %bb.5:
+; X86-BMI2-NEXT:    movl %edi, %eax
+; X86-BMI2-NEXT:  .LBB47_6:
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %edi
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_d0:
 ; X64-NOBMI:       # %bb.0:
@@ -3955,16 +3955,16 @@ define i64 @bzhi64_d0(i64 %val, i64 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    shrq %cl, %rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi64_d0:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrq %rsi, %rdi, %rax
-; X64-BMI1NOTBM-NEXT:    retq
+; X64-BMI1-LABEL: bzhi64_d0:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrq %rsi, %rdi, %rax
+; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI1BMI2-LABEL: bzhi64_d0:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI2-LABEL: bzhi64_d0:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
+; X64-BMI2-NEXT:    retq
   %numhighbits = sub i64 64, %numlowbits
   %highbitscleared = shl i64 %val, %numhighbits
   %masked = lshr i64 %highbitscleared, %numhighbits
@@ -4011,76 +4011,76 @@ define i64 @bzhi64_d1_indexzext(i64 %val, i8 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    popl %ebx
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi64_d1_indexzext:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %ebx
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movb $64, %cl
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl %edx, %esi
-; X86-BMI1NOTBM-NEXT:    shll %cl, %esi
-; X86-BMI1NOTBM-NEXT:    shldl %cl, %edx, %eax
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    movl %esi, %edi
-; X86-BMI1NOTBM-NEXT:    jne .LBB48_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB48_2:
-; X86-BMI1NOTBM-NEXT:    movl %edi, %eax
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
-; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    movl $0, %edx
-; X86-BMI1NOTBM-NEXT:    jne .LBB48_4
-; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl %esi, %ebx
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB48_4:
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %ebx
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB48_6
-; X86-BMI1NOTBM-NEXT:  # %bb.5:
-; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB48_6:
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    popl %ebx
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bzhi64_d1_indexzext:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %edi
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1BMI2-NEXT:    movb $64, %cl
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %esi
-; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %edi
-; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB48_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %edi, %esi
-; X86-BMI1BMI2-NEXT:    movl $0, %edi
-; X86-BMI1BMI2-NEXT:  .LBB48_2:
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    jne .LBB48_4
-; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB48_4:
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edi
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    jne .LBB48_6
-; X86-BMI1BMI2-NEXT:  # %bb.5:
-; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:  .LBB48_6:
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %edi
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bzhi64_d1_indexzext:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %ebx
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movb $64, %cl
+; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl %edx, %esi
+; X86-BMI1-NEXT:    shll %cl, %esi
+; X86-BMI1-NEXT:    shldl %cl, %edx, %eax
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    movl %esi, %edi
+; X86-BMI1-NEXT:    jne .LBB48_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %eax, %edi
+; X86-BMI1-NEXT:  .LBB48_2:
+; X86-BMI1-NEXT:    movl %edi, %eax
+; X86-BMI1-NEXT:    shrl %cl, %eax
+; X86-BMI1-NEXT:    xorl %ebx, %ebx
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    movl $0, %edx
+; X86-BMI1-NEXT:    jne .LBB48_4
+; X86-BMI1-NEXT:  # %bb.3:
+; X86-BMI1-NEXT:    movl %esi, %ebx
+; X86-BMI1-NEXT:    movl %eax, %edx
+; X86-BMI1-NEXT:  .LBB48_4:
+; X86-BMI1-NEXT:    shrdl %cl, %edi, %ebx
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    jne .LBB48_6
+; X86-BMI1-NEXT:  # %bb.5:
+; X86-BMI1-NEXT:    movl %ebx, %eax
+; X86-BMI1-NEXT:  .LBB48_6:
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    popl %ebx
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bzhi64_d1_indexzext:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %edi
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    movb $64, %cl
+; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    shldl %cl, %eax, %esi
+; X86-BMI2-NEXT:    shlxl %ecx, %eax, %edi
+; X86-BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB48_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %edi, %esi
+; X86-BMI2-NEXT:    movl $0, %edi
+; X86-BMI2-NEXT:  .LBB48_2:
+; X86-BMI2-NEXT:    shrxl %ecx, %esi, %eax
+; X86-BMI2-NEXT:    jne .LBB48_4
+; X86-BMI2-NEXT:  # %bb.3:
+; X86-BMI2-NEXT:    movl %eax, %edx
+; X86-BMI2-NEXT:  .LBB48_4:
+; X86-BMI2-NEXT:    shrdl %cl, %esi, %edi
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    jne .LBB48_6
+; X86-BMI2-NEXT:  # %bb.5:
+; X86-BMI2-NEXT:    movl %edi, %eax
+; X86-BMI2-NEXT:  .LBB48_6:
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %edi
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_d1_indexzext:
 ; X64-NOBMI:       # %bb.0:
@@ -4092,18 +4092,18 @@ define i64 @bzhi64_d1_indexzext(i64 %val, i8 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    shrq %cl, %rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi64_d1_indexzext:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrq %rsi, %rdi, %rax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bzhi64_d1_indexzext:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-BMI1BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bzhi64_d1_indexzext:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrq %rsi, %rdi, %rax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bzhi64_d1_indexzext:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
+; X64-BMI2-NEXT:    retq
   %numhighbits = sub i8 64, %numlowbits
   %sh_prom = zext i8 %numhighbits to i64
   %highbitscleared = shl i64 %val, %sh_prom
@@ -4152,78 +4152,78 @@ define i64 @bzhi64_d2_load(i64* %w, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    popl %ebx
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi64_d2_load:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %ebx
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movl (%eax), %edx
-; X86-BMI1NOTBM-NEXT:    movl 4(%eax), %eax
-; X86-BMI1NOTBM-NEXT:    movb $64, %cl
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl %edx, %esi
-; X86-BMI1NOTBM-NEXT:    shll %cl, %esi
-; X86-BMI1NOTBM-NEXT:    shldl %cl, %edx, %eax
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    movl %esi, %edi
-; X86-BMI1NOTBM-NEXT:    jne .LBB49_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB49_2:
-; X86-BMI1NOTBM-NEXT:    movl %edi, %eax
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
-; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    movl $0, %edx
-; X86-BMI1NOTBM-NEXT:    jne .LBB49_4
-; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl %esi, %ebx
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB49_4:
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %ebx
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB49_6
-; X86-BMI1NOTBM-NEXT:  # %bb.5:
-; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB49_6:
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    popl %ebx
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bzhi64_d2_load:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %edi
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    movl (%eax), %edx
-; X86-BMI1BMI2-NEXT:    movl 4(%eax), %esi
-; X86-BMI1BMI2-NEXT:    movb $64, %cl
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    shldl %cl, %edx, %esi
-; X86-BMI1BMI2-NEXT:    shlxl %ecx, %edx, %edi
-; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB49_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %edi, %esi
-; X86-BMI1BMI2-NEXT:    movl $0, %edi
-; X86-BMI1BMI2-NEXT:  .LBB49_2:
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    jne .LBB49_4
-; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB49_4:
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edi
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    jne .LBB49_6
-; X86-BMI1BMI2-NEXT:  # %bb.5:
-; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:  .LBB49_6:
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %edi
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bzhi64_d2_load:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %ebx
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movl (%eax), %edx
+; X86-BMI1-NEXT:    movl 4(%eax), %eax
+; X86-BMI1-NEXT:    movb $64, %cl
+; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl %edx, %esi
+; X86-BMI1-NEXT:    shll %cl, %esi
+; X86-BMI1-NEXT:    shldl %cl, %edx, %eax
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    movl %esi, %edi
+; X86-BMI1-NEXT:    jne .LBB49_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %eax, %edi
+; X86-BMI1-NEXT:  .LBB49_2:
+; X86-BMI1-NEXT:    movl %edi, %eax
+; X86-BMI1-NEXT:    shrl %cl, %eax
+; X86-BMI1-NEXT:    xorl %ebx, %ebx
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    movl $0, %edx
+; X86-BMI1-NEXT:    jne .LBB49_4
+; X86-BMI1-NEXT:  # %bb.3:
+; X86-BMI1-NEXT:    movl %esi, %ebx
+; X86-BMI1-NEXT:    movl %eax, %edx
+; X86-BMI1-NEXT:  .LBB49_4:
+; X86-BMI1-NEXT:    shrdl %cl, %edi, %ebx
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    jne .LBB49_6
+; X86-BMI1-NEXT:  # %bb.5:
+; X86-BMI1-NEXT:    movl %ebx, %eax
+; X86-BMI1-NEXT:  .LBB49_6:
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    popl %ebx
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bzhi64_d2_load:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %edi
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movl (%eax), %edx
+; X86-BMI2-NEXT:    movl 4(%eax), %esi
+; X86-BMI2-NEXT:    movb $64, %cl
+; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    shldl %cl, %edx, %esi
+; X86-BMI2-NEXT:    shlxl %ecx, %edx, %edi
+; X86-BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB49_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %edi, %esi
+; X86-BMI2-NEXT:    movl $0, %edi
+; X86-BMI2-NEXT:  .LBB49_2:
+; X86-BMI2-NEXT:    shrxl %ecx, %esi, %eax
+; X86-BMI2-NEXT:    jne .LBB49_4
+; X86-BMI2-NEXT:  # %bb.3:
+; X86-BMI2-NEXT:    movl %eax, %edx
+; X86-BMI2-NEXT:  .LBB49_4:
+; X86-BMI2-NEXT:    shrdl %cl, %esi, %edi
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    jne .LBB49_6
+; X86-BMI2-NEXT:  # %bb.5:
+; X86-BMI2-NEXT:    movl %edi, %eax
+; X86-BMI2-NEXT:  .LBB49_6:
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %edi
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_d2_load:
 ; X64-NOBMI:       # %bb.0:
@@ -4235,16 +4235,16 @@ define i64 @bzhi64_d2_load(i64* %w, i64 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    shrq %cl, %rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi64_d2_load:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrq %rsi, (%rdi), %rax
-; X64-BMI1NOTBM-NEXT:    retq
+; X64-BMI1-LABEL: bzhi64_d2_load:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrq %rsi, (%rdi), %rax
+; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI1BMI2-LABEL: bzhi64_d2_load:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI2-LABEL: bzhi64_d2_load:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
+; X64-BMI2-NEXT:    retq
   %val = load i64, i64* %w
   %numhighbits = sub i64 64, %numlowbits
   %highbitscleared = shl i64 %val, %numhighbits
@@ -4293,78 +4293,78 @@ define i64 @bzhi64_d3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    popl %ebx
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi64_d3_load_indexzext:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %ebx
-; X86-BMI1NOTBM-NEXT:    pushl %edi
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movl (%eax), %edx
-; X86-BMI1NOTBM-NEXT:    movl 4(%eax), %eax
-; X86-BMI1NOTBM-NEXT:    movb $64, %cl
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl %edx, %esi
-; X86-BMI1NOTBM-NEXT:    shll %cl, %esi
-; X86-BMI1NOTBM-NEXT:    shldl %cl, %edx, %eax
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    movl %esi, %edi
-; X86-BMI1NOTBM-NEXT:    jne .LBB50_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
-; X86-BMI1NOTBM-NEXT:  .LBB50_2:
-; X86-BMI1NOTBM-NEXT:    movl %edi, %eax
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
-; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    movl $0, %edx
-; X86-BMI1NOTBM-NEXT:    jne .LBB50_4
-; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl %esi, %ebx
-; X86-BMI1NOTBM-NEXT:    movl %eax, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB50_4:
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edi, %ebx
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB50_6
-; X86-BMI1NOTBM-NEXT:  # %bb.5:
-; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB50_6:
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    popl %edi
-; X86-BMI1NOTBM-NEXT:    popl %ebx
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bzhi64_d3_load_indexzext:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    pushl %edi
-; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    movl (%eax), %edx
-; X86-BMI1BMI2-NEXT:    movl 4(%eax), %esi
-; X86-BMI1BMI2-NEXT:    movb $64, %cl
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    shldl %cl, %edx, %esi
-; X86-BMI1BMI2-NEXT:    shlxl %ecx, %edx, %edi
-; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB50_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %edi, %esi
-; X86-BMI1BMI2-NEXT:    movl $0, %edi
-; X86-BMI1BMI2-NEXT:  .LBB50_2:
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
-; X86-BMI1BMI2-NEXT:    jne .LBB50_4
-; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:  .LBB50_4:
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edi
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    jne .LBB50_6
-; X86-BMI1BMI2-NEXT:  # %bb.5:
-; X86-BMI1BMI2-NEXT:    movl %edi, %eax
-; X86-BMI1BMI2-NEXT:  .LBB50_6:
-; X86-BMI1BMI2-NEXT:    popl %esi
-; X86-BMI1BMI2-NEXT:    popl %edi
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bzhi64_d3_load_indexzext:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %ebx
+; X86-BMI1-NEXT:    pushl %edi
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movl (%eax), %edx
+; X86-BMI1-NEXT:    movl 4(%eax), %eax
+; X86-BMI1-NEXT:    movb $64, %cl
+; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl %edx, %esi
+; X86-BMI1-NEXT:    shll %cl, %esi
+; X86-BMI1-NEXT:    shldl %cl, %edx, %eax
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    movl %esi, %edi
+; X86-BMI1-NEXT:    jne .LBB50_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %eax, %edi
+; X86-BMI1-NEXT:  .LBB50_2:
+; X86-BMI1-NEXT:    movl %edi, %eax
+; X86-BMI1-NEXT:    shrl %cl, %eax
+; X86-BMI1-NEXT:    xorl %ebx, %ebx
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    movl $0, %edx
+; X86-BMI1-NEXT:    jne .LBB50_4
+; X86-BMI1-NEXT:  # %bb.3:
+; X86-BMI1-NEXT:    movl %esi, %ebx
+; X86-BMI1-NEXT:    movl %eax, %edx
+; X86-BMI1-NEXT:  .LBB50_4:
+; X86-BMI1-NEXT:    shrdl %cl, %edi, %ebx
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    jne .LBB50_6
+; X86-BMI1-NEXT:  # %bb.5:
+; X86-BMI1-NEXT:    movl %ebx, %eax
+; X86-BMI1-NEXT:  .LBB50_6:
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    popl %edi
+; X86-BMI1-NEXT:    popl %ebx
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bzhi64_d3_load_indexzext:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %edi
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movl (%eax), %edx
+; X86-BMI2-NEXT:    movl 4(%eax), %esi
+; X86-BMI2-NEXT:    movb $64, %cl
+; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    shldl %cl, %edx, %esi
+; X86-BMI2-NEXT:    shlxl %ecx, %edx, %edi
+; X86-BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB50_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %edi, %esi
+; X86-BMI2-NEXT:    movl $0, %edi
+; X86-BMI2-NEXT:  .LBB50_2:
+; X86-BMI2-NEXT:    shrxl %ecx, %esi, %eax
+; X86-BMI2-NEXT:    jne .LBB50_4
+; X86-BMI2-NEXT:  # %bb.3:
+; X86-BMI2-NEXT:    movl %eax, %edx
+; X86-BMI2-NEXT:  .LBB50_4:
+; X86-BMI2-NEXT:    shrdl %cl, %esi, %edi
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    jne .LBB50_6
+; X86-BMI2-NEXT:  # %bb.5:
+; X86-BMI2-NEXT:    movl %edi, %eax
+; X86-BMI2-NEXT:  .LBB50_6:
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %edi
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_d3_load_indexzext:
 ; X64-NOBMI:       # %bb.0:
@@ -4376,18 +4376,18 @@ define i64 @bzhi64_d3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    shrq %cl, %rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi64_d3_load_indexzext:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrq %rsi, (%rdi), %rax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bzhi64_d3_load_indexzext:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-BMI1BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bzhi64_d3_load_indexzext:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrq %rsi, (%rdi), %rax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bzhi64_d3_load_indexzext:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
+; X64-BMI2-NEXT:    retq
   %val = load i64, i64* %w
   %numhighbits = sub i8 64, %numlowbits
   %sh_prom = zext i8 %numhighbits to i64
@@ -4426,53 +4426,53 @@ define i32 @bzhi64_32_d0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi64_32_d0:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movb $64, %cl
-; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl %esi, %edx
-; X86-BMI1NOTBM-NEXT:    shll %cl, %edx
-; X86-BMI1NOTBM-NEXT:    shldl %cl, %esi, %eax
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    je .LBB51_2
-; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %edx, %eax
-; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
-; X86-BMI1NOTBM-NEXT:  .LBB51_2:
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %edx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %eax
-; X86-BMI1NOTBM-NEXT:    testb $32, %cl
-; X86-BMI1NOTBM-NEXT:    jne .LBB51_4
-; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl %edx, %eax
-; X86-BMI1NOTBM-NEXT:  .LBB51_4:
-; X86-BMI1NOTBM-NEXT:    popl %esi
-; X86-BMI1NOTBM-NEXT:    retl
-;
-; X86-BMI1BMI2-LABEL: bzhi64_32_d0:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT:    movb $64, %cl
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %edx
-; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %eax
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB51_2
-; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %eax, %edx
-; X86-BMI1BMI2-NEXT:    xorl %eax, %eax
-; X86-BMI1BMI2-NEXT:  .LBB51_2:
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %edx, %eax
-; X86-BMI1BMI2-NEXT:    testb $32, %cl
-; X86-BMI1BMI2-NEXT:    je .LBB51_4
-; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edx, %eax
-; X86-BMI1BMI2-NEXT:  .LBB51_4:
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI1-LABEL: bzhi64_32_d0:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    pushl %esi
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    movb $64, %cl
+; X86-BMI1-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1-NEXT:    movl %esi, %edx
+; X86-BMI1-NEXT:    shll %cl, %edx
+; X86-BMI1-NEXT:    shldl %cl, %esi, %eax
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    je .LBB51_2
+; X86-BMI1-NEXT:  # %bb.1:
+; X86-BMI1-NEXT:    movl %edx, %eax
+; X86-BMI1-NEXT:    xorl %edx, %edx
+; X86-BMI1-NEXT:  .LBB51_2:
+; X86-BMI1-NEXT:    shrdl %cl, %eax, %edx
+; X86-BMI1-NEXT:    shrl %cl, %eax
+; X86-BMI1-NEXT:    testb $32, %cl
+; X86-BMI1-NEXT:    jne .LBB51_4
+; X86-BMI1-NEXT:  # %bb.3:
+; X86-BMI1-NEXT:    movl %edx, %eax
+; X86-BMI1-NEXT:  .LBB51_4:
+; X86-BMI1-NEXT:    popl %esi
+; X86-BMI1-NEXT:    retl
+;
+; X86-BMI2-LABEL: bzhi64_32_d0:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    movb $64, %cl
+; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
+; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB51_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %eax, %edx
+; X86-BMI2-NEXT:    xorl %eax, %eax
+; X86-BMI2-NEXT:  .LBB51_2:
+; X86-BMI2-NEXT:    shrdl %cl, %edx, %eax
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB51_4
+; X86-BMI2-NEXT:  # %bb.3:
+; X86-BMI2-NEXT:    shrxl %ecx, %edx, %eax
+; X86-BMI2-NEXT:  .LBB51_4:
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_32_d0:
 ; X64-NOBMI:       # %bb.0:
@@ -4485,18 +4485,18 @@ define i32 @bzhi64_32_d0(i64 %val, i64 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi64_32_d0:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrq %rsi, %rdi, %rax
-; X64-BMI1NOTBM-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-BMI1NOTBM-NEXT:    retq
-;
-; X64-BMI1BMI2-LABEL: bzhi64_32_d0:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
-; X64-BMI1BMI2-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI1-LABEL: bzhi64_32_d0:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrq %rsi, %rdi, %rax
+; X64-BMI1-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-BMI1-NEXT:    retq
+;
+; X64-BMI2-LABEL: bzhi64_32_d0:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
+; X64-BMI2-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-BMI2-NEXT:    retq
   %numhighbits = sub i64 64, %numlowbits
   %highbitscleared = shl i64 %val, %numhighbits
   %masked = lshr i64 %highbitscleared, %numhighbits
@@ -4516,18 +4516,18 @@ define i32 @bzhi64_32_d1(i64 %val, i32 %numlowbits) nounwind {
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    retl
 ;
-; X86-BMI1NOTBM-LABEL: bzhi64_32_d1:
-; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1NOTBM-NEXT:    shll $8, %eax
-; X86-BMI1NOTBM-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    retl
+; X86-BMI1-LABEL: bzhi64_32_d1:
+; X86-BMI1:       # %bb.0:
+; X86-BMI1-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT:    shll $8, %eax
+; X86-BMI1-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT:    retl
 ;
-; X86-BMI1BMI2-LABEL: bzhi64_32_d1:
-; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    retl
+; X86-BMI2-LABEL: bzhi64_32_d1:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_32_d1:
 ; X64-NOBMI:       # %bb.0:
@@ -4540,16 +4540,16 @@ define i32 @bzhi64_32_d1(i64 %val, i32 %numlowbits) nounwind {
 ; X64-NOBMI-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NOBMI-NEXT:    retq
 ;
-; X64-BMI1NOTBM-LABEL: bzhi64_32_d1:
-; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrl %esi, %edi, %eax
-; X64-BMI1NOTBM-NEXT:    retq
+; X64-BMI1-LABEL: bzhi64_32_d1:
+; X64-BMI1:       # %bb.0:
+; X64-BMI1-NEXT:    shll $8, %esi
+; X64-BMI1-NEXT:    bextrl %esi, %edi, %eax
+; X64-BMI1-NEXT:    retq
 ;
-; X64-BMI1BMI2-LABEL: bzhi64_32_d1:
-; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
-; X64-BMI1BMI2-NEXT:    retq
+; X64-BMI2-LABEL: bzhi64_32_d1:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
+; X64-BMI2-NEXT:    retq
   %truncval = trunc i64 %val to i32
   %numhighbits = sub i32 32, %numlowbits
   %highbitscleared = shl i32 %truncval, %numhighbits
@@ -4692,11 +4692,16 @@ define i64 @bzhi64_constant_mask64(i64 %val) nounwind {
 ; X64-BMI1TBM-NEXT:    bextrq $15872, %rdi, %rax # imm = 0x3E00
 ; X64-BMI1TBM-NEXT:    retq
 ;
-; X64-BMI1NOTBMBMI2-LABEL: bzhi64_constant_mask64:
-; X64-BMI1NOTBMBMI2:       # %bb.0:
-; X64-BMI1NOTBMBMI2-NEXT:    movb $62, %al
-; X64-BMI1NOTBMBMI2-NEXT:    bzhiq %rax, %rdi, %rax
-; X64-BMI1NOTBMBMI2-NEXT:    retq
+; X64-BMI2TBM-LABEL: bzhi64_constant_mask64:
+; X64-BMI2TBM:       # %bb.0:
+; X64-BMI2TBM-NEXT:    bextrq $15872, %rdi, %rax # imm = 0x3E00
+; X64-BMI2TBM-NEXT:    retq
+;
+; X64-BMI2NOTBM-LABEL: bzhi64_constant_mask64:
+; X64-BMI2NOTBM:       # %bb.0:
+; X64-BMI2NOTBM-NEXT:    movb $62, %al
+; X64-BMI2NOTBM-NEXT:    bzhiq %rax, %rdi, %rax
+; X64-BMI2NOTBM-NEXT:    retq
   %masked = and i64 %val, 4611686018427387903
   ret i64 %masked
 }
@@ -4727,11 +4732,16 @@ define i64 @bzhi64_constant_mask64_load(i64* %val) nounwind {
 ; X64-BMI1TBM-NEXT:    bextrq $15872, (%rdi), %rax # imm = 0x3E00
 ; X64-BMI1TBM-NEXT:    retq
 ;
-; X64-BMI1NOTBMBMI2-LABEL: bzhi64_constant_mask64_load:
-; X64-BMI1NOTBMBMI2:       # %bb.0:
-; X64-BMI1NOTBMBMI2-NEXT:    movb $62, %al
-; X64-BMI1NOTBMBMI2-NEXT:    bzhiq %rax, (%rdi), %rax
-; X64-BMI1NOTBMBMI2-NEXT:    retq
+; X64-BMI2TBM-LABEL: bzhi64_constant_mask64_load:
+; X64-BMI2TBM:       # %bb.0:
+; X64-BMI2TBM-NEXT:    bextrq $15872, (%rdi), %rax # imm = 0x3E00
+; X64-BMI2TBM-NEXT:    retq
+;
+; X64-BMI2NOTBM-LABEL: bzhi64_constant_mask64_load:
+; X64-BMI2NOTBM:       # %bb.0:
+; X64-BMI2NOTBM-NEXT:    movb $62, %al
+; X64-BMI2NOTBM-NEXT:    bzhiq %rax, (%rdi), %rax
+; X64-BMI2NOTBM-NEXT:    retq
   %val1 = load i64, i64* %val
   %masked = and i64 %val1, 4611686018427387903
   ret i64 %masked


        


More information about the llvm-branch-commits mailing list