[llvm] r345583 - [X86] Add extra-uses on the mask of pattern c of extract-{low, }bits.ll tests

Roman Lebedev via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 30 04:12:29 PDT 2018


Author: lebedevri
Date: Tue Oct 30 04:12:29 2018
New Revision: 345583

URL: http://llvm.org/viewvc/llvm-project?rev=345583&view=rev
Log:
[X86] Add extra-uses on the mask of pattern c of extract-{low,}bits.ll tests

Summary:
Because of the D48768, that pattern is always unfolded into pattern d,
thus we had no test coverage.

Reviewers: RKSimon, craig.topper

Reviewed By: craig.topper

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D53574

Modified:
    llvm/trunk/test/CodeGen/X86/extract-bits.ll
    llvm/trunk/test/CodeGen/X86/extract-lowbits.ll

Modified: llvm/trunk/test/CodeGen/X86/extract-bits.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/extract-bits.ll?rev=345583&r1=345582&r2=345583&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/extract-bits.ll (original)
+++ llvm/trunk/test/CodeGen/X86/extract-bits.ll Tue Oct 30 04:12:29 2018
@@ -2801,63 +2801,133 @@ define i64 @bextr64_b5_skipextrauses(i64
 define i32 @bextr32_c0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bextr32_c0:
 ; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    pushl %edi
+; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    pushl %eax
 ; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI-NEXT:    shrl %cl, %eax
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NOBMI-NEXT:    shrl %cl, %edi
 ; X86-NOBMI-NEXT:    xorl %ecx, %ecx
 ; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X86-NOBMI-NEXT:    shll %cl, %eax
+; X86-NOBMI-NEXT:    movl $-1, %esi
 ; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X86-NOBMI-NEXT:    shrl %cl, %eax
+; X86-NOBMI-NEXT:    shrl %cl, %esi
+; X86-NOBMI-NEXT:    movl %esi, (%esp)
+; X86-NOBMI-NEXT:    calll use32
+; X86-NOBMI-NEXT:    andl %edi, %esi
+; X86-NOBMI-NEXT:    movl %esi, %eax
+; X86-NOBMI-NEXT:    addl $4, %esp
+; X86-NOBMI-NEXT:    popl %esi
+; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    retl
 ;
 ; X86-BMI1NOTBM-LABEL: bextr32_c0:
 ; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1NOTBM-NEXT:    pushl %edi
+; X86-BMI1NOTBM-NEXT:    pushl %esi
+; X86-BMI1NOTBM-NEXT:    pushl %eax
 ; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
-; X86-BMI1NOTBM-NEXT:    shll $8, %eax
-; X86-BMI1NOTBM-NEXT:    bextrl %eax, %edx, %eax
+; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
+; X86-BMI1NOTBM-NEXT:    xorl %ecx, %ecx
+; X86-BMI1NOTBM-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
+; X86-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI1NOTBM-NEXT:    shrl %cl, %esi
+; X86-BMI1NOTBM-NEXT:    movl %esi, (%esp)
+; X86-BMI1NOTBM-NEXT:    calll use32
+; X86-BMI1NOTBM-NEXT:    andl %edi, %esi
+; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
+; X86-BMI1NOTBM-NEXT:    addl $4, %esp
+; X86-BMI1NOTBM-NEXT:    popl %esi
+; X86-BMI1NOTBM-NEXT:    popl %edi
 ; X86-BMI1NOTBM-NEXT:    retl
 ;
 ; X86-BMI1BMI2-LABEL: bextr32_c0:
 ; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    bzhil %eax, %ecx, %eax
+; X86-BMI1BMI2-NEXT:    pushl %edi
+; X86-BMI1BMI2-NEXT:    pushl %esi
+; X86-BMI1BMI2-NEXT:    pushl %eax
+; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %edi
+; X86-BMI1BMI2-NEXT:    movl %esi, %eax
+; X86-BMI1BMI2-NEXT:    negl %eax
+; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
+; X86-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %eax
+; X86-BMI1BMI2-NEXT:    movl %eax, (%esp)
+; X86-BMI1BMI2-NEXT:    calll use32
+; X86-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
+; X86-BMI1BMI2-NEXT:    addl $4, %esp
+; X86-BMI1BMI2-NEXT:    popl %esi
+; X86-BMI1BMI2-NEXT:    popl %edi
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr32_c0:
 ; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    pushq %rbp
+; X64-NOBMI-NEXT:    pushq %rbx
+; X64-NOBMI-NEXT:    pushq %rax
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
-; X64-NOBMI-NEXT:    movl %edi, %eax
+; X64-NOBMI-NEXT:    movl %edi, %ebx
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI-NEXT:    shrl %cl, %eax
+; X64-NOBMI-NEXT:    shrl %cl, %ebx
 ; X64-NOBMI-NEXT:    negl %edx
+; X64-NOBMI-NEXT:    movl $-1, %ebp
 ; X64-NOBMI-NEXT:    movl %edx, %ecx
-; X64-NOBMI-NEXT:    shll %cl, %eax
-; X64-NOBMI-NEXT:    shrl %cl, %eax
+; X64-NOBMI-NEXT:    shrl %cl, %ebp
+; X64-NOBMI-NEXT:    movl %ebp, %edi
+; X64-NOBMI-NEXT:    callq use32
+; X64-NOBMI-NEXT:    andl %ebx, %ebp
+; X64-NOBMI-NEXT:    movl %ebp, %eax
+; X64-NOBMI-NEXT:    addq $8, %rsp
+; X64-NOBMI-NEXT:    popq %rbx
+; X64-NOBMI-NEXT:    popq %rbp
 ; X64-NOBMI-NEXT:    retq
 ;
 ; X64-BMI1NOTBM-LABEL: bextr32_c0:
 ; X64-BMI1NOTBM:       # %bb.0:
+; X64-BMI1NOTBM-NEXT:    pushq %rbp
+; X64-BMI1NOTBM-NEXT:    pushq %rbx
+; X64-BMI1NOTBM-NEXT:    pushq %rax
 ; X64-BMI1NOTBM-NEXT:    movl %esi, %ecx
+; X64-BMI1NOTBM-NEXT:    movl %edi, %ebx
 ; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-BMI1NOTBM-NEXT:    shrl %cl, %edi
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    bextrl %edx, %edi, %eax
+; X64-BMI1NOTBM-NEXT:    shrl %cl, %ebx
+; X64-BMI1NOTBM-NEXT:    negl %edx
+; X64-BMI1NOTBM-NEXT:    movl $-1, %ebp
+; X64-BMI1NOTBM-NEXT:    movl %edx, %ecx
+; X64-BMI1NOTBM-NEXT:    shrl %cl, %ebp
+; X64-BMI1NOTBM-NEXT:    movl %ebp, %edi
+; X64-BMI1NOTBM-NEXT:    callq use32
+; X64-BMI1NOTBM-NEXT:    andl %ebx, %ebp
+; X64-BMI1NOTBM-NEXT:    movl %ebp, %eax
+; X64-BMI1NOTBM-NEXT:    addq $8, %rsp
+; X64-BMI1NOTBM-NEXT:    popq %rbx
+; X64-BMI1NOTBM-NEXT:    popq %rbp
 ; X64-BMI1NOTBM-NEXT:    retq
 ;
 ; X64-BMI1BMI2-LABEL: bextr32_c0:
 ; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxl %esi, %edi, %eax
-; X64-BMI1BMI2-NEXT:    bzhil %edx, %eax, %eax
+; X64-BMI1BMI2-NEXT:    pushq %rbp
+; X64-BMI1BMI2-NEXT:    pushq %rbx
+; X64-BMI1BMI2-NEXT:    pushq %rax
+; X64-BMI1BMI2-NEXT:    movl %edx, %ebx
+; X64-BMI1BMI2-NEXT:    shrxl %esi, %edi, %ebp
+; X64-BMI1BMI2-NEXT:    movl %edx, %eax
+; X64-BMI1BMI2-NEXT:    negl %eax
+; X64-BMI1BMI2-NEXT:    movl $-1, %ecx
+; X64-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %edi
+; X64-BMI1BMI2-NEXT:    callq use32
+; X64-BMI1BMI2-NEXT:    bzhil %ebx, %ebp, %eax
+; X64-BMI1BMI2-NEXT:    addq $8, %rsp
+; X64-BMI1BMI2-NEXT:    popq %rbx
+; X64-BMI1BMI2-NEXT:    popq %rbp
 ; X64-BMI1BMI2-NEXT:    retq
   %shifted = lshr i32 %val, %numskipbits
   %numhighbits = sub i32 32, %numlowbits
   %mask = lshr i32 -1, %numhighbits
+  call void @use32(i32 %mask)
   %masked = and i32 %mask, %shifted
   ret i32 %masked
 }
@@ -2865,65 +2935,135 @@ define i32 @bextr32_c0(i32 %val, i32 %nu
 define i32 @bextr32_c1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bextr32_c1_indexzext:
 ; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    pushl %edi
+; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    pushl %eax
 ; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI-NEXT:    shrl %cl, %eax
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NOBMI-NEXT:    shrl %cl, %edi
 ; X86-NOBMI-NEXT:    xorl %ecx, %ecx
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT:    shll %cl, %eax
+; X86-NOBMI-NEXT:    movl $-1, %esi
 ; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X86-NOBMI-NEXT:    shrl %cl, %eax
+; X86-NOBMI-NEXT:    shrl %cl, %esi
+; X86-NOBMI-NEXT:    movl %esi, (%esp)
+; X86-NOBMI-NEXT:    calll use32
+; X86-NOBMI-NEXT:    andl %edi, %esi
+; X86-NOBMI-NEXT:    movl %esi, %eax
+; X86-NOBMI-NEXT:    addl $4, %esp
+; X86-NOBMI-NEXT:    popl %esi
+; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    retl
 ;
 ; X86-BMI1NOTBM-LABEL: bextr32_c1_indexzext:
 ; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1NOTBM-NEXT:    pushl %edi
+; X86-BMI1NOTBM-NEXT:    pushl %esi
+; X86-BMI1NOTBM-NEXT:    pushl %eax
 ; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
-; X86-BMI1NOTBM-NEXT:    shll $8, %eax
-; X86-BMI1NOTBM-NEXT:    bextrl %eax, %edx, %eax
+; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
+; X86-BMI1NOTBM-NEXT:    xorl %ecx, %ecx
+; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
+; X86-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI1NOTBM-NEXT:    shrl %cl, %esi
+; X86-BMI1NOTBM-NEXT:    movl %esi, (%esp)
+; X86-BMI1NOTBM-NEXT:    calll use32
+; X86-BMI1NOTBM-NEXT:    andl %edi, %esi
+; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
+; X86-BMI1NOTBM-NEXT:    addl $4, %esp
+; X86-BMI1NOTBM-NEXT:    popl %esi
+; X86-BMI1NOTBM-NEXT:    popl %edi
 ; X86-BMI1NOTBM-NEXT:    retl
 ;
 ; X86-BMI1BMI2-LABEL: bextr32_c1_indexzext:
 ; X86-BMI1BMI2:       # %bb.0:
+; X86-BMI1BMI2-NEXT:    pushl %ebx
+; X86-BMI1BMI2-NEXT:    pushl %esi
+; X86-BMI1BMI2-NEXT:    pushl %eax
+; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
 ; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    bzhil %eax, %ecx, %eax
+; X86-BMI1BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %esi
+; X86-BMI1BMI2-NEXT:    movl %ebx, %eax
+; X86-BMI1BMI2-NEXT:    negb %al
+; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
+; X86-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %eax
+; X86-BMI1BMI2-NEXT:    movl %eax, (%esp)
+; X86-BMI1BMI2-NEXT:    calll use32
+; X86-BMI1BMI2-NEXT:    bzhil %ebx, %esi, %eax
+; X86-BMI1BMI2-NEXT:    addl $4, %esp
+; X86-BMI1BMI2-NEXT:    popl %esi
+; X86-BMI1BMI2-NEXT:    popl %ebx
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr32_c1_indexzext:
 ; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    pushq %rbp
+; X64-NOBMI-NEXT:    pushq %rbx
+; X64-NOBMI-NEXT:    pushq %rax
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
-; X64-NOBMI-NEXT:    movl %edi, %eax
+; X64-NOBMI-NEXT:    movl %edi, %ebx
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI-NEXT:    shrl %cl, %eax
+; X64-NOBMI-NEXT:    shrl %cl, %ebx
 ; X64-NOBMI-NEXT:    negb %dl
+; X64-NOBMI-NEXT:    movl $-1, %ebp
 ; X64-NOBMI-NEXT:    movl %edx, %ecx
-; X64-NOBMI-NEXT:    shll %cl, %eax
-; X64-NOBMI-NEXT:    shrl %cl, %eax
+; X64-NOBMI-NEXT:    shrl %cl, %ebp
+; X64-NOBMI-NEXT:    movl %ebp, %edi
+; X64-NOBMI-NEXT:    callq use32
+; X64-NOBMI-NEXT:    andl %ebx, %ebp
+; X64-NOBMI-NEXT:    movl %ebp, %eax
+; X64-NOBMI-NEXT:    addq $8, %rsp
+; X64-NOBMI-NEXT:    popq %rbx
+; X64-NOBMI-NEXT:    popq %rbp
 ; X64-NOBMI-NEXT:    retq
 ;
 ; X64-BMI1NOTBM-LABEL: bextr32_c1_indexzext:
 ; X64-BMI1NOTBM:       # %bb.0:
+; X64-BMI1NOTBM-NEXT:    pushq %rbp
+; X64-BMI1NOTBM-NEXT:    pushq %rbx
+; X64-BMI1NOTBM-NEXT:    pushq %rax
 ; X64-BMI1NOTBM-NEXT:    movl %esi, %ecx
+; X64-BMI1NOTBM-NEXT:    movl %edi, %ebx
 ; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-BMI1NOTBM-NEXT:    shrl %cl, %edi
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    bextrl %edx, %edi, %eax
+; X64-BMI1NOTBM-NEXT:    shrl %cl, %ebx
+; X64-BMI1NOTBM-NEXT:    negb %dl
+; X64-BMI1NOTBM-NEXT:    movl $-1, %ebp
+; X64-BMI1NOTBM-NEXT:    movl %edx, %ecx
+; X64-BMI1NOTBM-NEXT:    shrl %cl, %ebp
+; X64-BMI1NOTBM-NEXT:    movl %ebp, %edi
+; X64-BMI1NOTBM-NEXT:    callq use32
+; X64-BMI1NOTBM-NEXT:    andl %ebx, %ebp
+; X64-BMI1NOTBM-NEXT:    movl %ebp, %eax
+; X64-BMI1NOTBM-NEXT:    addq $8, %rsp
+; X64-BMI1NOTBM-NEXT:    popq %rbx
+; X64-BMI1NOTBM-NEXT:    popq %rbp
 ; X64-BMI1NOTBM-NEXT:    retq
 ;
 ; X64-BMI1BMI2-LABEL: bextr32_c1_indexzext:
 ; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxl %esi, %edi, %eax
-; X64-BMI1BMI2-NEXT:    bzhil %edx, %eax, %eax
+; X64-BMI1BMI2-NEXT:    pushq %rbp
+; X64-BMI1BMI2-NEXT:    pushq %rbx
+; X64-BMI1BMI2-NEXT:    pushq %rax
+; X64-BMI1BMI2-NEXT:    movl %edx, %ebx
+; X64-BMI1BMI2-NEXT:    shrxl %esi, %edi, %ebp
+; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
+; X64-BMI1BMI2-NEXT:    negb %al
+; X64-BMI1BMI2-NEXT:    movl $-1, %ecx
+; X64-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %edi
+; X64-BMI1BMI2-NEXT:    callq use32
+; X64-BMI1BMI2-NEXT:    bzhil %ebx, %ebp, %eax
+; X64-BMI1BMI2-NEXT:    addq $8, %rsp
+; X64-BMI1BMI2-NEXT:    popq %rbx
+; X64-BMI1BMI2-NEXT:    popq %rbp
 ; X64-BMI1BMI2-NEXT:    retq
   %skip = zext i8 %numskipbits to i32
   %shifted = lshr i32 %val, %skip
   %numhighbits = sub i8 32, %numlowbits
   %sh_prom = zext i8 %numhighbits to i32
   %mask = lshr i32 -1, %sh_prom
+  call void @use32(i32 %mask)
   %masked = and i32 %mask, %shifted
   ret i32 %masked
 }
@@ -2931,68 +3071,137 @@ define i32 @bextr32_c1_indexzext(i32 %va
 define i32 @bextr32_c2_load(i32* %w, i32 %numskipbits, i32 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bextr32_c2_load:
 ; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    pushl %edi
+; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    pushl %eax
 ; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI-NEXT:    movl (%eax), %eax
-; X86-NOBMI-NEXT:    shrl %cl, %eax
+; X86-NOBMI-NEXT:    movl (%eax), %edi
+; X86-NOBMI-NEXT:    shrl %cl, %edi
 ; X86-NOBMI-NEXT:    xorl %ecx, %ecx
 ; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X86-NOBMI-NEXT:    shll %cl, %eax
+; X86-NOBMI-NEXT:    movl $-1, %esi
 ; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X86-NOBMI-NEXT:    shrl %cl, %eax
+; X86-NOBMI-NEXT:    shrl %cl, %esi
+; X86-NOBMI-NEXT:    movl %esi, (%esp)
+; X86-NOBMI-NEXT:    calll use32
+; X86-NOBMI-NEXT:    andl %edi, %esi
+; X86-NOBMI-NEXT:    movl %esi, %eax
+; X86-NOBMI-NEXT:    addl $4, %esp
+; X86-NOBMI-NEXT:    popl %esi
+; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    retl
 ;
 ; X86-BMI1NOTBM-LABEL: bextr32_c2_load:
 ; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1NOTBM-NEXT:    pushl %edi
+; X86-BMI1NOTBM-NEXT:    pushl %esi
+; X86-BMI1NOTBM-NEXT:    pushl %eax
 ; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1NOTBM-NEXT:    movl (%edx), %edx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
-; X86-BMI1NOTBM-NEXT:    shll $8, %eax
-; X86-BMI1NOTBM-NEXT:    bextrl %eax, %edx, %eax
+; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1NOTBM-NEXT:    movl (%eax), %edi
+; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
+; X86-BMI1NOTBM-NEXT:    xorl %ecx, %ecx
+; X86-BMI1NOTBM-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
+; X86-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI1NOTBM-NEXT:    shrl %cl, %esi
+; X86-BMI1NOTBM-NEXT:    movl %esi, (%esp)
+; X86-BMI1NOTBM-NEXT:    calll use32
+; X86-BMI1NOTBM-NEXT:    andl %edi, %esi
+; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
+; X86-BMI1NOTBM-NEXT:    addl $4, %esp
+; X86-BMI1NOTBM-NEXT:    popl %esi
+; X86-BMI1NOTBM-NEXT:    popl %edi
 ; X86-BMI1NOTBM-NEXT:    retl
 ;
 ; X86-BMI1BMI2-LABEL: bextr32_c2_load:
 ; X86-BMI1BMI2:       # %bb.0:
+; X86-BMI1BMI2-NEXT:    pushl %edi
+; X86-BMI1BMI2-NEXT:    pushl %esi
+; X86-BMI1BMI2-NEXT:    pushl %eax
+; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %dl
-; X86-BMI1BMI2-NEXT:    shrxl %edx, (%ecx), %ecx
-; X86-BMI1BMI2-NEXT:    bzhil %eax, %ecx, %eax
+; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1BMI2-NEXT:    shrxl %ecx, (%eax), %edi
+; X86-BMI1BMI2-NEXT:    movl %esi, %eax
+; X86-BMI1BMI2-NEXT:    negl %eax
+; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
+; X86-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %eax
+; X86-BMI1BMI2-NEXT:    movl %eax, (%esp)
+; X86-BMI1BMI2-NEXT:    calll use32
+; X86-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
+; X86-BMI1BMI2-NEXT:    addl $4, %esp
+; X86-BMI1BMI2-NEXT:    popl %esi
+; X86-BMI1BMI2-NEXT:    popl %edi
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr32_c2_load:
 ; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    pushq %rbp
+; X64-NOBMI-NEXT:    pushq %rbx
+; X64-NOBMI-NEXT:    pushq %rax
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
-; X64-NOBMI-NEXT:    movl (%rdi), %eax
+; X64-NOBMI-NEXT:    movl (%rdi), %ebp
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI-NEXT:    shrl %cl, %eax
+; X64-NOBMI-NEXT:    shrl %cl, %ebp
 ; X64-NOBMI-NEXT:    negl %edx
+; X64-NOBMI-NEXT:    movl $-1, %ebx
 ; X64-NOBMI-NEXT:    movl %edx, %ecx
-; X64-NOBMI-NEXT:    shll %cl, %eax
-; X64-NOBMI-NEXT:    shrl %cl, %eax
+; X64-NOBMI-NEXT:    shrl %cl, %ebx
+; X64-NOBMI-NEXT:    movl %ebx, %edi
+; X64-NOBMI-NEXT:    callq use32
+; X64-NOBMI-NEXT:    andl %ebp, %ebx
+; X64-NOBMI-NEXT:    movl %ebx, %eax
+; X64-NOBMI-NEXT:    addq $8, %rsp
+; X64-NOBMI-NEXT:    popq %rbx
+; X64-NOBMI-NEXT:    popq %rbp
 ; X64-NOBMI-NEXT:    retq
 ;
 ; X64-BMI1NOTBM-LABEL: bextr32_c2_load:
 ; X64-BMI1NOTBM:       # %bb.0:
+; X64-BMI1NOTBM-NEXT:    pushq %rbp
+; X64-BMI1NOTBM-NEXT:    pushq %rbx
+; X64-BMI1NOTBM-NEXT:    pushq %rax
 ; X64-BMI1NOTBM-NEXT:    movl %esi, %ecx
-; X64-BMI1NOTBM-NEXT:    movl (%rdi), %eax
+; X64-BMI1NOTBM-NEXT:    movl (%rdi), %ebp
 ; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-BMI1NOTBM-NEXT:    shrl %cl, %eax
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    bextrl %edx, %eax, %eax
+; X64-BMI1NOTBM-NEXT:    shrl %cl, %ebp
+; X64-BMI1NOTBM-NEXT:    negl %edx
+; X64-BMI1NOTBM-NEXT:    movl $-1, %ebx
+; X64-BMI1NOTBM-NEXT:    movl %edx, %ecx
+; X64-BMI1NOTBM-NEXT:    shrl %cl, %ebx
+; X64-BMI1NOTBM-NEXT:    movl %ebx, %edi
+; X64-BMI1NOTBM-NEXT:    callq use32
+; X64-BMI1NOTBM-NEXT:    andl %ebp, %ebx
+; X64-BMI1NOTBM-NEXT:    movl %ebx, %eax
+; X64-BMI1NOTBM-NEXT:    addq $8, %rsp
+; X64-BMI1NOTBM-NEXT:    popq %rbx
+; X64-BMI1NOTBM-NEXT:    popq %rbp
 ; X64-BMI1NOTBM-NEXT:    retq
 ;
 ; X64-BMI1BMI2-LABEL: bextr32_c2_load:
 ; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxl %esi, (%rdi), %eax
-; X64-BMI1BMI2-NEXT:    bzhil %edx, %eax, %eax
+; X64-BMI1BMI2-NEXT:    pushq %rbp
+; X64-BMI1BMI2-NEXT:    pushq %rbx
+; X64-BMI1BMI2-NEXT:    pushq %rax
+; X64-BMI1BMI2-NEXT:    movl %edx, %ebx
+; X64-BMI1BMI2-NEXT:    shrxl %esi, (%rdi), %ebp
+; X64-BMI1BMI2-NEXT:    movl %edx, %eax
+; X64-BMI1BMI2-NEXT:    negl %eax
+; X64-BMI1BMI2-NEXT:    movl $-1, %ecx
+; X64-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %edi
+; X64-BMI1BMI2-NEXT:    callq use32
+; X64-BMI1BMI2-NEXT:    bzhil %ebx, %ebp, %eax
+; X64-BMI1BMI2-NEXT:    addq $8, %rsp
+; X64-BMI1BMI2-NEXT:    popq %rbx
+; X64-BMI1BMI2-NEXT:    popq %rbp
 ; X64-BMI1BMI2-NEXT:    retq
   %val = load i32, i32* %w
   %shifted = lshr i32 %val, %numskipbits
   %numhighbits = sub i32 32, %numlowbits
   %mask = lshr i32 -1, %numhighbits
+  call void @use32(i32 %mask)
   %masked = and i32 %mask, %shifted
   ret i32 %masked
 }
@@ -3000,63 +3209,131 @@ define i32 @bextr32_c2_load(i32* %w, i32
 define i32 @bextr32_c3_load_indexzext(i32* %w, i8 %numskipbits, i8 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bextr32_c3_load_indexzext:
 ; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    pushl %edi
+; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    pushl %eax
 ; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI-NEXT:    movl (%eax), %eax
-; X86-NOBMI-NEXT:    shrl %cl, %eax
+; X86-NOBMI-NEXT:    movl (%eax), %edi
+; X86-NOBMI-NEXT:    shrl %cl, %edi
 ; X86-NOBMI-NEXT:    xorl %ecx, %ecx
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT:    shll %cl, %eax
+; X86-NOBMI-NEXT:    movl $-1, %esi
 ; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X86-NOBMI-NEXT:    shrl %cl, %eax
+; X86-NOBMI-NEXT:    shrl %cl, %esi
+; X86-NOBMI-NEXT:    movl %esi, (%esp)
+; X86-NOBMI-NEXT:    calll use32
+; X86-NOBMI-NEXT:    andl %edi, %esi
+; X86-NOBMI-NEXT:    movl %esi, %eax
+; X86-NOBMI-NEXT:    addl $4, %esp
+; X86-NOBMI-NEXT:    popl %esi
+; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    retl
 ;
 ; X86-BMI1NOTBM-LABEL: bextr32_c3_load_indexzext:
 ; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1NOTBM-NEXT:    pushl %edi
+; X86-BMI1NOTBM-NEXT:    pushl %esi
+; X86-BMI1NOTBM-NEXT:    pushl %eax
 ; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1NOTBM-NEXT:    movl (%edx), %edx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
-; X86-BMI1NOTBM-NEXT:    shll $8, %eax
-; X86-BMI1NOTBM-NEXT:    bextrl %eax, %edx, %eax
+; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1NOTBM-NEXT:    movl (%eax), %edi
+; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
+; X86-BMI1NOTBM-NEXT:    xorl %ecx, %ecx
+; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
+; X86-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI1NOTBM-NEXT:    shrl %cl, %esi
+; X86-BMI1NOTBM-NEXT:    movl %esi, (%esp)
+; X86-BMI1NOTBM-NEXT:    calll use32
+; X86-BMI1NOTBM-NEXT:    andl %edi, %esi
+; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
+; X86-BMI1NOTBM-NEXT:    addl $4, %esp
+; X86-BMI1NOTBM-NEXT:    popl %esi
+; X86-BMI1NOTBM-NEXT:    popl %edi
 ; X86-BMI1NOTBM-NEXT:    retl
 ;
 ; X86-BMI1BMI2-LABEL: bextr32_c3_load_indexzext:
 ; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %dl
-; X86-BMI1BMI2-NEXT:    shrxl %edx, (%ecx), %ecx
-; X86-BMI1BMI2-NEXT:    bzhil %eax, %ecx, %eax
+; X86-BMI1BMI2-NEXT:    pushl %ebx
+; X86-BMI1BMI2-NEXT:    pushl %esi
+; X86-BMI1BMI2-NEXT:    pushl %eax
+; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1BMI2-NEXT:    shrxl %ecx, (%eax), %esi
+; X86-BMI1BMI2-NEXT:    movl %ebx, %eax
+; X86-BMI1BMI2-NEXT:    negb %al
+; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
+; X86-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %eax
+; X86-BMI1BMI2-NEXT:    movl %eax, (%esp)
+; X86-BMI1BMI2-NEXT:    calll use32
+; X86-BMI1BMI2-NEXT:    bzhil %ebx, %esi, %eax
+; X86-BMI1BMI2-NEXT:    addl $4, %esp
+; X86-BMI1BMI2-NEXT:    popl %esi
+; X86-BMI1BMI2-NEXT:    popl %ebx
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr32_c3_load_indexzext:
 ; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    pushq %rbp
+; X64-NOBMI-NEXT:    pushq %rbx
+; X64-NOBMI-NEXT:    pushq %rax
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
-; X64-NOBMI-NEXT:    movl (%rdi), %eax
+; X64-NOBMI-NEXT:    movl (%rdi), %ebp
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI-NEXT:    shrl %cl, %eax
+; X64-NOBMI-NEXT:    shrl %cl, %ebp
 ; X64-NOBMI-NEXT:    negb %dl
+; X64-NOBMI-NEXT:    movl $-1, %ebx
 ; X64-NOBMI-NEXT:    movl %edx, %ecx
-; X64-NOBMI-NEXT:    shll %cl, %eax
-; X64-NOBMI-NEXT:    shrl %cl, %eax
+; X64-NOBMI-NEXT:    shrl %cl, %ebx
+; X64-NOBMI-NEXT:    movl %ebx, %edi
+; X64-NOBMI-NEXT:    callq use32
+; X64-NOBMI-NEXT:    andl %ebp, %ebx
+; X64-NOBMI-NEXT:    movl %ebx, %eax
+; X64-NOBMI-NEXT:    addq $8, %rsp
+; X64-NOBMI-NEXT:    popq %rbx
+; X64-NOBMI-NEXT:    popq %rbp
 ; X64-NOBMI-NEXT:    retq
 ;
 ; X64-BMI1NOTBM-LABEL: bextr32_c3_load_indexzext:
 ; X64-BMI1NOTBM:       # %bb.0:
+; X64-BMI1NOTBM-NEXT:    pushq %rbp
+; X64-BMI1NOTBM-NEXT:    pushq %rbx
+; X64-BMI1NOTBM-NEXT:    pushq %rax
 ; X64-BMI1NOTBM-NEXT:    movl %esi, %ecx
-; X64-BMI1NOTBM-NEXT:    movl (%rdi), %eax
+; X64-BMI1NOTBM-NEXT:    movl (%rdi), %ebp
 ; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-BMI1NOTBM-NEXT:    shrl %cl, %eax
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    bextrl %edx, %eax, %eax
+; X64-BMI1NOTBM-NEXT:    shrl %cl, %ebp
+; X64-BMI1NOTBM-NEXT:    negb %dl
+; X64-BMI1NOTBM-NEXT:    movl $-1, %ebx
+; X64-BMI1NOTBM-NEXT:    movl %edx, %ecx
+; X64-BMI1NOTBM-NEXT:    shrl %cl, %ebx
+; X64-BMI1NOTBM-NEXT:    movl %ebx, %edi
+; X64-BMI1NOTBM-NEXT:    callq use32
+; X64-BMI1NOTBM-NEXT:    andl %ebp, %ebx
+; X64-BMI1NOTBM-NEXT:    movl %ebx, %eax
+; X64-BMI1NOTBM-NEXT:    addq $8, %rsp
+; X64-BMI1NOTBM-NEXT:    popq %rbx
+; X64-BMI1NOTBM-NEXT:    popq %rbp
 ; X64-BMI1NOTBM-NEXT:    retq
 ;
 ; X64-BMI1BMI2-LABEL: bextr32_c3_load_indexzext:
 ; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxl %esi, (%rdi), %eax
-; X64-BMI1BMI2-NEXT:    bzhil %edx, %eax, %eax
+; X64-BMI1BMI2-NEXT:    pushq %rbp
+; X64-BMI1BMI2-NEXT:    pushq %rbx
+; X64-BMI1BMI2-NEXT:    pushq %rax
+; X64-BMI1BMI2-NEXT:    movl %edx, %ebx
+; X64-BMI1BMI2-NEXT:    shrxl %esi, (%rdi), %ebp
+; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
+; X64-BMI1BMI2-NEXT:    negb %al
+; X64-BMI1BMI2-NEXT:    movl $-1, %ecx
+; X64-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %edi
+; X64-BMI1BMI2-NEXT:    callq use32
+; X64-BMI1BMI2-NEXT:    bzhil %ebx, %ebp, %eax
+; X64-BMI1BMI2-NEXT:    addq $8, %rsp
+; X64-BMI1BMI2-NEXT:    popq %rbx
+; X64-BMI1BMI2-NEXT:    popq %rbp
 ; X64-BMI1BMI2-NEXT:    retq
   %val = load i32, i32* %w
   %skip = zext i8 %numskipbits to i32
@@ -3064,6 +3341,7 @@ define i32 @bextr32_c3_load_indexzext(i3
   %numhighbits = sub i8 32, %numlowbits
   %sh_prom = zext i8 %numhighbits to i32
   %mask = lshr i32 -1, %sh_prom
+  call void @use32(i32 %mask)
   %masked = and i32 %mask, %shifted
   ret i32 %masked
 }
@@ -3071,63 +3349,133 @@ define i32 @bextr32_c3_load_indexzext(i3
 define i32 @bextr32_c4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bextr32_c4_commutative:
 ; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    pushl %edi
+; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    pushl %eax
 ; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI-NEXT:    shrl %cl, %eax
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NOBMI-NEXT:    shrl %cl, %edi
 ; X86-NOBMI-NEXT:    xorl %ecx, %ecx
 ; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X86-NOBMI-NEXT:    shll %cl, %eax
+; X86-NOBMI-NEXT:    movl $-1, %esi
 ; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X86-NOBMI-NEXT:    shrl %cl, %eax
+; X86-NOBMI-NEXT:    shrl %cl, %esi
+; X86-NOBMI-NEXT:    movl %esi, (%esp)
+; X86-NOBMI-NEXT:    calll use32
+; X86-NOBMI-NEXT:    andl %edi, %esi
+; X86-NOBMI-NEXT:    movl %esi, %eax
+; X86-NOBMI-NEXT:    addl $4, %esp
+; X86-NOBMI-NEXT:    popl %esi
+; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    retl
 ;
 ; X86-BMI1NOTBM-LABEL: bextr32_c4_commutative:
 ; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1NOTBM-NEXT:    pushl %edi
+; X86-BMI1NOTBM-NEXT:    pushl %esi
+; X86-BMI1NOTBM-NEXT:    pushl %eax
 ; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
-; X86-BMI1NOTBM-NEXT:    shll $8, %eax
-; X86-BMI1NOTBM-NEXT:    bextrl %eax, %edx, %eax
+; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
+; X86-BMI1NOTBM-NEXT:    xorl %ecx, %ecx
+; X86-BMI1NOTBM-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
+; X86-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI1NOTBM-NEXT:    shrl %cl, %esi
+; X86-BMI1NOTBM-NEXT:    movl %esi, (%esp)
+; X86-BMI1NOTBM-NEXT:    calll use32
+; X86-BMI1NOTBM-NEXT:    andl %edi, %esi
+; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
+; X86-BMI1NOTBM-NEXT:    addl $4, %esp
+; X86-BMI1NOTBM-NEXT:    popl %esi
+; X86-BMI1NOTBM-NEXT:    popl %edi
 ; X86-BMI1NOTBM-NEXT:    retl
 ;
 ; X86-BMI1BMI2-LABEL: bextr32_c4_commutative:
 ; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    bzhil %eax, %ecx, %eax
+; X86-BMI1BMI2-NEXT:    pushl %edi
+; X86-BMI1BMI2-NEXT:    pushl %esi
+; X86-BMI1BMI2-NEXT:    pushl %eax
+; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %edi
+; X86-BMI1BMI2-NEXT:    movl %esi, %eax
+; X86-BMI1BMI2-NEXT:    negl %eax
+; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
+; X86-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %eax
+; X86-BMI1BMI2-NEXT:    movl %eax, (%esp)
+; X86-BMI1BMI2-NEXT:    calll use32
+; X86-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
+; X86-BMI1BMI2-NEXT:    addl $4, %esp
+; X86-BMI1BMI2-NEXT:    popl %esi
+; X86-BMI1BMI2-NEXT:    popl %edi
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr32_c4_commutative:
 ; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    pushq %rbp
+; X64-NOBMI-NEXT:    pushq %rbx
+; X64-NOBMI-NEXT:    pushq %rax
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
-; X64-NOBMI-NEXT:    movl %edi, %eax
+; X64-NOBMI-NEXT:    movl %edi, %ebx
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI-NEXT:    shrl %cl, %eax
+; X64-NOBMI-NEXT:    shrl %cl, %ebx
 ; X64-NOBMI-NEXT:    negl %edx
+; X64-NOBMI-NEXT:    movl $-1, %ebp
 ; X64-NOBMI-NEXT:    movl %edx, %ecx
-; X64-NOBMI-NEXT:    shll %cl, %eax
-; X64-NOBMI-NEXT:    shrl %cl, %eax
+; X64-NOBMI-NEXT:    shrl %cl, %ebp
+; X64-NOBMI-NEXT:    movl %ebp, %edi
+; X64-NOBMI-NEXT:    callq use32
+; X64-NOBMI-NEXT:    andl %ebx, %ebp
+; X64-NOBMI-NEXT:    movl %ebp, %eax
+; X64-NOBMI-NEXT:    addq $8, %rsp
+; X64-NOBMI-NEXT:    popq %rbx
+; X64-NOBMI-NEXT:    popq %rbp
 ; X64-NOBMI-NEXT:    retq
 ;
 ; X64-BMI1NOTBM-LABEL: bextr32_c4_commutative:
 ; X64-BMI1NOTBM:       # %bb.0:
+; X64-BMI1NOTBM-NEXT:    pushq %rbp
+; X64-BMI1NOTBM-NEXT:    pushq %rbx
+; X64-BMI1NOTBM-NEXT:    pushq %rax
 ; X64-BMI1NOTBM-NEXT:    movl %esi, %ecx
+; X64-BMI1NOTBM-NEXT:    movl %edi, %ebx
 ; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-BMI1NOTBM-NEXT:    shrl %cl, %edi
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    bextrl %edx, %edi, %eax
+; X64-BMI1NOTBM-NEXT:    shrl %cl, %ebx
+; X64-BMI1NOTBM-NEXT:    negl %edx
+; X64-BMI1NOTBM-NEXT:    movl $-1, %ebp
+; X64-BMI1NOTBM-NEXT:    movl %edx, %ecx
+; X64-BMI1NOTBM-NEXT:    shrl %cl, %ebp
+; X64-BMI1NOTBM-NEXT:    movl %ebp, %edi
+; X64-BMI1NOTBM-NEXT:    callq use32
+; X64-BMI1NOTBM-NEXT:    andl %ebx, %ebp
+; X64-BMI1NOTBM-NEXT:    movl %ebp, %eax
+; X64-BMI1NOTBM-NEXT:    addq $8, %rsp
+; X64-BMI1NOTBM-NEXT:    popq %rbx
+; X64-BMI1NOTBM-NEXT:    popq %rbp
 ; X64-BMI1NOTBM-NEXT:    retq
 ;
 ; X64-BMI1BMI2-LABEL: bextr32_c4_commutative:
 ; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxl %esi, %edi, %eax
-; X64-BMI1BMI2-NEXT:    bzhil %edx, %eax, %eax
+; X64-BMI1BMI2-NEXT:    pushq %rbp
+; X64-BMI1BMI2-NEXT:    pushq %rbx
+; X64-BMI1BMI2-NEXT:    pushq %rax
+; X64-BMI1BMI2-NEXT:    movl %edx, %ebx
+; X64-BMI1BMI2-NEXT:    shrxl %esi, %edi, %ebp
+; X64-BMI1BMI2-NEXT:    movl %edx, %eax
+; X64-BMI1BMI2-NEXT:    negl %eax
+; X64-BMI1BMI2-NEXT:    movl $-1, %ecx
+; X64-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %edi
+; X64-BMI1BMI2-NEXT:    callq use32
+; X64-BMI1BMI2-NEXT:    bzhil %ebx, %ebp, %eax
+; X64-BMI1BMI2-NEXT:    addq $8, %rsp
+; X64-BMI1BMI2-NEXT:    popq %rbx
+; X64-BMI1BMI2-NEXT:    popq %rbp
 ; X64-BMI1BMI2-NEXT:    retq
   %shifted = lshr i32 %val, %numskipbits
   %numhighbits = sub i32 32, %numlowbits
   %mask = lshr i32 -1, %numhighbits
+  call void @use32(i32 %mask)
   %masked = and i32 %shifted, %mask ; swapped order
   ret i32 %masked
 }
@@ -3135,98 +3483,156 @@ define i32 @bextr32_c4_commutative(i32 %
 define i32 @bextr32_c5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bextr32_c5_skipextrauses:
 ; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    pushl %ebx
+; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    subl $8, %esp
-; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI-NEXT:    movl %eax, %ecx
-; X86-NOBMI-NEXT:    shrl %cl, %esi
+; X86-NOBMI-NEXT:    subl $16, %esp
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NOBMI-NEXT:    movl %ebx, %ecx
+; X86-NOBMI-NEXT:    shrl %cl, %edi
 ; X86-NOBMI-NEXT:    xorl %ecx, %ecx
 ; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X86-NOBMI-NEXT:    shll %cl, %esi
+; X86-NOBMI-NEXT:    movl $-1, %esi
 ; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X86-NOBMI-NEXT:    shrl %cl, %esi
-; X86-NOBMI-NEXT:    movl %eax, (%esp)
+; X86-NOBMI-NEXT:    movl %esi, (%esp)
+; X86-NOBMI-NEXT:    calll use32
+; X86-NOBMI-NEXT:    andl %edi, %esi
+; X86-NOBMI-NEXT:    movl %ebx, (%esp)
 ; X86-NOBMI-NEXT:    calll use32
 ; X86-NOBMI-NEXT:    movl %esi, %eax
-; X86-NOBMI-NEXT:    addl $8, %esp
+; X86-NOBMI-NEXT:    addl $16, %esp
 ; X86-NOBMI-NEXT:    popl %esi
+; X86-NOBMI-NEXT:    popl %edi
+; X86-NOBMI-NEXT:    popl %ebx
 ; X86-NOBMI-NEXT:    retl
 ;
 ; X86-BMI1NOTBM-LABEL: bextr32_c5_skipextrauses:
 ; X86-BMI1NOTBM:       # %bb.0:
+; X86-BMI1NOTBM-NEXT:    pushl %ebx
+; X86-BMI1NOTBM-NEXT:    pushl %edi
 ; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    subl $8, %esp
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
-; X86-BMI1NOTBM-NEXT:    shll $8, %eax
-; X86-BMI1NOTBM-NEXT:    bextrl %eax, %edx, %esi
-; X86-BMI1NOTBM-NEXT:    movl %ecx, (%esp)
+; X86-BMI1NOTBM-NEXT:    subl $16, %esp
+; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-BMI1NOTBM-NEXT:    movl %ebx, %ecx
+; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
+; X86-BMI1NOTBM-NEXT:    xorl %ecx, %ecx
+; X86-BMI1NOTBM-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
+; X86-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI1NOTBM-NEXT:    shrl %cl, %esi
+; X86-BMI1NOTBM-NEXT:    movl %esi, (%esp)
+; X86-BMI1NOTBM-NEXT:    calll use32
+; X86-BMI1NOTBM-NEXT:    andl %edi, %esi
+; X86-BMI1NOTBM-NEXT:    movl %ebx, (%esp)
 ; X86-BMI1NOTBM-NEXT:    calll use32
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
-; X86-BMI1NOTBM-NEXT:    addl $8, %esp
+; X86-BMI1NOTBM-NEXT:    addl $16, %esp
 ; X86-BMI1NOTBM-NEXT:    popl %esi
+; X86-BMI1NOTBM-NEXT:    popl %edi
+; X86-BMI1NOTBM-NEXT:    popl %ebx
 ; X86-BMI1NOTBM-NEXT:    retl
 ;
 ; X86-BMI1BMI2-LABEL: bextr32_c5_skipextrauses:
 ; X86-BMI1BMI2:       # %bb.0:
+; X86-BMI1BMI2-NEXT:    pushl %ebx
+; X86-BMI1BMI2-NEXT:    pushl %edi
 ; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    subl $8, %esp
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT:    bzhil %eax, %edx, %esi
-; X86-BMI1BMI2-NEXT:    movl %ecx, (%esp)
+; X86-BMI1BMI2-NEXT:    subl $16, %esp
+; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-BMI1BMI2-NEXT:    shrxl %edi, {{[0-9]+}}(%esp), %ebx
+; X86-BMI1BMI2-NEXT:    movl %esi, %eax
+; X86-BMI1BMI2-NEXT:    negl %eax
+; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
+; X86-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %eax
+; X86-BMI1BMI2-NEXT:    movl %eax, (%esp)
+; X86-BMI1BMI2-NEXT:    calll use32
+; X86-BMI1BMI2-NEXT:    bzhil %esi, %ebx, %esi
+; X86-BMI1BMI2-NEXT:    movl %edi, (%esp)
 ; X86-BMI1BMI2-NEXT:    calll use32
 ; X86-BMI1BMI2-NEXT:    movl %esi, %eax
-; X86-BMI1BMI2-NEXT:    addl $8, %esp
+; X86-BMI1BMI2-NEXT:    addl $16, %esp
 ; X86-BMI1BMI2-NEXT:    popl %esi
+; X86-BMI1BMI2-NEXT:    popl %edi
+; X86-BMI1BMI2-NEXT:    popl %ebx
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr32_c5_skipextrauses:
 ; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    pushq %rbp
+; X64-NOBMI-NEXT:    pushq %r14
 ; X64-NOBMI-NEXT:    pushq %rbx
-; X64-NOBMI-NEXT:    movl %edi, %ebx
-; X64-NOBMI-NEXT:    movl %esi, %ecx
-; X64-NOBMI-NEXT:    shrl %cl, %ebx
+; X64-NOBMI-NEXT:    movl %esi, %r14d
+; X64-NOBMI-NEXT:    movl %edi, %ebp
+; X64-NOBMI-NEXT:    movl %r14d, %ecx
+; X64-NOBMI-NEXT:    shrl %cl, %ebp
 ; X64-NOBMI-NEXT:    negl %edx
+; X64-NOBMI-NEXT:    movl $-1, %ebx
 ; X64-NOBMI-NEXT:    movl %edx, %ecx
-; X64-NOBMI-NEXT:    shll %cl, %ebx
 ; X64-NOBMI-NEXT:    shrl %cl, %ebx
-; X64-NOBMI-NEXT:    movl %esi, %edi
+; X64-NOBMI-NEXT:    movl %ebx, %edi
+; X64-NOBMI-NEXT:    callq use32
+; X64-NOBMI-NEXT:    andl %ebp, %ebx
+; X64-NOBMI-NEXT:    movl %r14d, %edi
 ; X64-NOBMI-NEXT:    callq use32
 ; X64-NOBMI-NEXT:    movl %ebx, %eax
 ; X64-NOBMI-NEXT:    popq %rbx
+; X64-NOBMI-NEXT:    popq %r14
+; X64-NOBMI-NEXT:    popq %rbp
 ; X64-NOBMI-NEXT:    retq
 ;
 ; X64-BMI1NOTBM-LABEL: bextr32_c5_skipextrauses:
 ; X64-BMI1NOTBM:       # %bb.0:
+; X64-BMI1NOTBM-NEXT:    pushq %rbp
+; X64-BMI1NOTBM-NEXT:    pushq %r14
 ; X64-BMI1NOTBM-NEXT:    pushq %rbx
-; X64-BMI1NOTBM-NEXT:    movl %esi, %ecx
-; X64-BMI1NOTBM-NEXT:    shrl %cl, %edi
-; X64-BMI1NOTBM-NEXT:    shll $8, %edx
-; X64-BMI1NOTBM-NEXT:    bextrl %edx, %edi, %ebx
-; X64-BMI1NOTBM-NEXT:    movl %esi, %edi
+; X64-BMI1NOTBM-NEXT:    movl %esi, %r14d
+; X64-BMI1NOTBM-NEXT:    movl %edi, %ebp
+; X64-BMI1NOTBM-NEXT:    movl %r14d, %ecx
+; X64-BMI1NOTBM-NEXT:    shrl %cl, %ebp
+; X64-BMI1NOTBM-NEXT:    negl %edx
+; X64-BMI1NOTBM-NEXT:    movl $-1, %ebx
+; X64-BMI1NOTBM-NEXT:    movl %edx, %ecx
+; X64-BMI1NOTBM-NEXT:    shrl %cl, %ebx
+; X64-BMI1NOTBM-NEXT:    movl %ebx, %edi
+; X64-BMI1NOTBM-NEXT:    callq use32
+; X64-BMI1NOTBM-NEXT:    andl %ebp, %ebx
+; X64-BMI1NOTBM-NEXT:    movl %r14d, %edi
 ; X64-BMI1NOTBM-NEXT:    callq use32
 ; X64-BMI1NOTBM-NEXT:    movl %ebx, %eax
 ; X64-BMI1NOTBM-NEXT:    popq %rbx
+; X64-BMI1NOTBM-NEXT:    popq %r14
+; X64-BMI1NOTBM-NEXT:    popq %rbp
 ; X64-BMI1NOTBM-NEXT:    retq
 ;
 ; X64-BMI1BMI2-LABEL: bextr32_c5_skipextrauses:
 ; X64-BMI1BMI2:       # %bb.0:
+; X64-BMI1BMI2-NEXT:    pushq %rbp
+; X64-BMI1BMI2-NEXT:    pushq %r14
 ; X64-BMI1BMI2-NEXT:    pushq %rbx
-; X64-BMI1BMI2-NEXT:    shrxl %esi, %edi, %eax
-; X64-BMI1BMI2-NEXT:    bzhil %edx, %eax, %ebx
-; X64-BMI1BMI2-NEXT:    movl %esi, %edi
+; X64-BMI1BMI2-NEXT:    movl %edx, %ebx
+; X64-BMI1BMI2-NEXT:    movl %esi, %ebp
+; X64-BMI1BMI2-NEXT:    shrxl %esi, %edi, %r14d
+; X64-BMI1BMI2-NEXT:    movl %edx, %eax
+; X64-BMI1BMI2-NEXT:    negl %eax
+; X64-BMI1BMI2-NEXT:    movl $-1, %ecx
+; X64-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %edi
+; X64-BMI1BMI2-NEXT:    callq use32
+; X64-BMI1BMI2-NEXT:    bzhil %ebx, %r14d, %ebx
+; X64-BMI1BMI2-NEXT:    movl %ebp, %edi
 ; X64-BMI1BMI2-NEXT:    callq use32
 ; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
 ; X64-BMI1BMI2-NEXT:    popq %rbx
+; X64-BMI1BMI2-NEXT:    popq %r14
+; X64-BMI1BMI2-NEXT:    popq %rbp
 ; X64-BMI1BMI2-NEXT:    retq
   %shifted = lshr i32 %val, %numskipbits
   %numhighbits = sub i32 32, %numlowbits
   %mask = lshr i32 -1, %numhighbits
+  call void @use32(i32 %mask)
   %masked = and i32 %mask, %shifted
   call void @use32(i32 %numskipbits)
   ret i32 %masked
@@ -3237,8 +3643,11 @@ define i32 @bextr32_c5_skipextrauses(i32
 define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bextr64_c0:
 ; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    pushl %ebp
+; X86-NOBMI-NEXT:    pushl %ebx
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    subl $12, %esp
 ; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -3253,26 +3662,39 @@ define i64 @bextr64_c0(i64 %val, i64 %nu
 ; X86-NOBMI-NEXT:  .LBB30_2:
 ; X86-NOBMI-NEXT:    movl $64, %ecx
 ; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X86-NOBMI-NEXT:    movl $-1, %eax
-; X86-NOBMI-NEXT:    movl $-1, %edx
-; X86-NOBMI-NEXT:    shrl %cl, %edx
-; X86-NOBMI-NEXT:    shrdl %cl, %eax, %eax
+; X86-NOBMI-NEXT:    movl $-1, %ebp
+; X86-NOBMI-NEXT:    movl $-1, %ebx
+; X86-NOBMI-NEXT:    shrl %cl, %ebx
+; X86-NOBMI-NEXT:    shrdl %cl, %ebp, %ebp
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    je .LBB30_4
 ; X86-NOBMI-NEXT:  # %bb.3:
-; X86-NOBMI-NEXT:    movl %edx, %eax
-; X86-NOBMI-NEXT:    xorl %edx, %edx
+; X86-NOBMI-NEXT:    movl %ebx, %ebp
+; X86-NOBMI-NEXT:    xorl %ebx, %ebx
 ; X86-NOBMI-NEXT:  .LBB30_4:
-; X86-NOBMI-NEXT:    andl %edi, %edx
-; X86-NOBMI-NEXT:    andl %esi, %eax
+; X86-NOBMI-NEXT:    subl $8, %esp
+; X86-NOBMI-NEXT:    pushl %ebx
+; X86-NOBMI-NEXT:    pushl %ebp
+; X86-NOBMI-NEXT:    calll use64
+; X86-NOBMI-NEXT:    addl $16, %esp
+; X86-NOBMI-NEXT:    andl %ebp, %esi
+; X86-NOBMI-NEXT:    andl %ebx, %edi
+; X86-NOBMI-NEXT:    movl %esi, %eax
+; X86-NOBMI-NEXT:    movl %edi, %edx
+; X86-NOBMI-NEXT:    addl $12, %esp
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    popl %edi
+; X86-NOBMI-NEXT:    popl %ebx
+; X86-NOBMI-NEXT:    popl %ebp
 ; X86-NOBMI-NEXT:    retl
 ;
 ; X86-BMI1NOTBM-LABEL: bextr64_c0:
 ; X86-BMI1NOTBM:       # %bb.0:
+; X86-BMI1NOTBM-NEXT:    pushl %ebp
+; X86-BMI1NOTBM-NEXT:    pushl %ebx
 ; X86-BMI1NOTBM-NEXT:    pushl %edi
 ; X86-BMI1NOTBM-NEXT:    pushl %esi
+; X86-BMI1NOTBM-NEXT:    subl $12, %esp
 ; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -3287,26 +3709,39 @@ define i64 @bextr64_c0(i64 %val, i64 %nu
 ; X86-BMI1NOTBM-NEXT:  .LBB30_2:
 ; X86-BMI1NOTBM-NEXT:    movl $64, %ecx
 ; X86-BMI1NOTBM-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1NOTBM-NEXT:    movl $-1, %eax
-; X86-BMI1NOTBM-NEXT:    movl $-1, %edx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %eax
+; X86-BMI1NOTBM-NEXT:    movl $-1, %ebp
+; X86-BMI1NOTBM-NEXT:    movl $-1, %ebx
+; X86-BMI1NOTBM-NEXT:    shrl %cl, %ebx
+; X86-BMI1NOTBM-NEXT:    shrdl %cl, %ebp, %ebp
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    je .LBB30_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl %edx, %eax
-; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
+; X86-BMI1NOTBM-NEXT:    movl %ebx, %ebp
+; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
 ; X86-BMI1NOTBM-NEXT:  .LBB30_4:
-; X86-BMI1NOTBM-NEXT:    andl %edi, %edx
-; X86-BMI1NOTBM-NEXT:    andl %esi, %eax
+; X86-BMI1NOTBM-NEXT:    subl $8, %esp
+; X86-BMI1NOTBM-NEXT:    pushl %ebx
+; X86-BMI1NOTBM-NEXT:    pushl %ebp
+; X86-BMI1NOTBM-NEXT:    calll use64
+; X86-BMI1NOTBM-NEXT:    addl $16, %esp
+; X86-BMI1NOTBM-NEXT:    andl %ebp, %esi
+; X86-BMI1NOTBM-NEXT:    andl %ebx, %edi
+; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
+; X86-BMI1NOTBM-NEXT:    movl %edi, %edx
+; X86-BMI1NOTBM-NEXT:    addl $12, %esp
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    popl %edi
+; X86-BMI1NOTBM-NEXT:    popl %ebx
+; X86-BMI1NOTBM-NEXT:    popl %ebp
 ; X86-BMI1NOTBM-NEXT:    retl
 ;
 ; X86-BMI1BMI2-LABEL: bextr64_c0:
 ; X86-BMI1BMI2:       # %bb.0:
+; X86-BMI1BMI2-NEXT:    pushl %ebp
+; X86-BMI1BMI2-NEXT:    pushl %ebx
 ; X86-BMI1BMI2-NEXT:    pushl %edi
 ; X86-BMI1BMI2-NEXT:    pushl %esi
+; X86-BMI1BMI2-NEXT:    subl $12, %esp
 ; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -3320,50 +3755,96 @@ define i64 @bextr64_c0(i64 %val, i64 %nu
 ; X86-BMI1BMI2-NEXT:  .LBB30_2:
 ; X86-BMI1BMI2-NEXT:    movl $64, %ecx
 ; X86-BMI1BMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    movl $-1, %eax
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edx
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %eax
+; X86-BMI1BMI2-NEXT:    movl $-1, %ebx
+; X86-BMI1BMI2-NEXT:    shrxl %ecx, %ebx, %ebp
+; X86-BMI1BMI2-NEXT:    shrdl %cl, %ebx, %ebx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
 ; X86-BMI1BMI2-NEXT:    je .LBB30_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    movl %edx, %eax
-; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI1BMI2-NEXT:    movl %ebp, %ebx
+; X86-BMI1BMI2-NEXT:    xorl %ebp, %ebp
 ; X86-BMI1BMI2-NEXT:  .LBB30_4:
-; X86-BMI1BMI2-NEXT:    andl %edi, %edx
-; X86-BMI1BMI2-NEXT:    andl %esi, %eax
+; X86-BMI1BMI2-NEXT:    subl $8, %esp
+; X86-BMI1BMI2-NEXT:    pushl %ebp
+; X86-BMI1BMI2-NEXT:    pushl %ebx
+; X86-BMI1BMI2-NEXT:    calll use64
+; X86-BMI1BMI2-NEXT:    addl $16, %esp
+; X86-BMI1BMI2-NEXT:    andl %ebx, %esi
+; X86-BMI1BMI2-NEXT:    andl %ebp, %edi
+; X86-BMI1BMI2-NEXT:    movl %esi, %eax
+; X86-BMI1BMI2-NEXT:    movl %edi, %edx
+; X86-BMI1BMI2-NEXT:    addl $12, %esp
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    popl %edi
+; X86-BMI1BMI2-NEXT:    popl %ebx
+; X86-BMI1BMI2-NEXT:    popl %ebp
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_c0:
 ; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    pushq %r14
+; X64-NOBMI-NEXT:    pushq %rbx
+; X64-NOBMI-NEXT:    pushq %rax
 ; X64-NOBMI-NEXT:    movq %rsi, %rcx
-; X64-NOBMI-NEXT:    movq %rdi, %rax
+; X64-NOBMI-NEXT:    movq %rdi, %r14
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
-; X64-NOBMI-NEXT:    shrq %cl, %rax
+; X64-NOBMI-NEXT:    shrq %cl, %r14
 ; X64-NOBMI-NEXT:    negl %edx
+; X64-NOBMI-NEXT:    movq $-1, %rbx
 ; X64-NOBMI-NEXT:    movl %edx, %ecx
-; X64-NOBMI-NEXT:    shlq %cl, %rax
-; X64-NOBMI-NEXT:    shrq %cl, %rax
+; X64-NOBMI-NEXT:    shrq %cl, %rbx
+; X64-NOBMI-NEXT:    movq %rbx, %rdi
+; X64-NOBMI-NEXT:    callq use64
+; X64-NOBMI-NEXT:    andq %r14, %rbx
+; X64-NOBMI-NEXT:    movq %rbx, %rax
+; X64-NOBMI-NEXT:    addq $8, %rsp
+; X64-NOBMI-NEXT:    popq %rbx
+; X64-NOBMI-NEXT:    popq %r14
 ; X64-NOBMI-NEXT:    retq
 ;
 ; X64-BMI1NOTBM-LABEL: bextr64_c0:
 ; X64-BMI1NOTBM:       # %bb.0:
+; X64-BMI1NOTBM-NEXT:    pushq %r14
+; X64-BMI1NOTBM-NEXT:    pushq %rbx
+; X64-BMI1NOTBM-NEXT:    pushq %rax
 ; X64-BMI1NOTBM-NEXT:    movq %rsi, %rcx
+; X64-BMI1NOTBM-NEXT:    movq %rdi, %r14
 ; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $rcx
-; X64-BMI1NOTBM-NEXT:    shrq %cl, %rdi
-; X64-BMI1NOTBM-NEXT:    shlq $8, %rdx
-; X64-BMI1NOTBM-NEXT:    bextrq %rdx, %rdi, %rax
+; X64-BMI1NOTBM-NEXT:    shrq %cl, %r14
+; X64-BMI1NOTBM-NEXT:    negl %edx
+; X64-BMI1NOTBM-NEXT:    movq $-1, %rbx
+; X64-BMI1NOTBM-NEXT:    movl %edx, %ecx
+; X64-BMI1NOTBM-NEXT:    shrq %cl, %rbx
+; X64-BMI1NOTBM-NEXT:    movq %rbx, %rdi
+; X64-BMI1NOTBM-NEXT:    callq use64
+; X64-BMI1NOTBM-NEXT:    andq %r14, %rbx
+; X64-BMI1NOTBM-NEXT:    movq %rbx, %rax
+; X64-BMI1NOTBM-NEXT:    addq $8, %rsp
+; X64-BMI1NOTBM-NEXT:    popq %rbx
+; X64-BMI1NOTBM-NEXT:    popq %r14
 ; X64-BMI1NOTBM-NEXT:    retq
 ;
 ; X64-BMI1BMI2-LABEL: bextr64_c0:
 ; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %rax
-; X64-BMI1BMI2-NEXT:    bzhiq %rdx, %rax, %rax
+; X64-BMI1BMI2-NEXT:    pushq %r14
+; X64-BMI1BMI2-NEXT:    pushq %rbx
+; X64-BMI1BMI2-NEXT:    pushq %rax
+; X64-BMI1BMI2-NEXT:    movq %rdx, %rbx
+; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %r14
+; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
+; X64-BMI1BMI2-NEXT:    negl %eax
+; X64-BMI1BMI2-NEXT:    movq $-1, %rcx
+; X64-BMI1BMI2-NEXT:    shrxq %rax, %rcx, %rdi
+; X64-BMI1BMI2-NEXT:    callq use64
+; X64-BMI1BMI2-NEXT:    bzhiq %rbx, %r14, %rax
+; X64-BMI1BMI2-NEXT:    addq $8, %rsp
+; X64-BMI1BMI2-NEXT:    popq %rbx
+; X64-BMI1BMI2-NEXT:    popq %r14
 ; X64-BMI1BMI2-NEXT:    retq
   %shifted = lshr i64 %val, %numskipbits
   %numhighbits = sub i64 64, %numlowbits
   %mask = lshr i64 -1, %numhighbits
+  call void @use64(i64 %mask)
   %masked = and i64 %mask, %shifted
   ret i64 %masked
 }
@@ -3371,8 +3852,11 @@ define i64 @bextr64_c0(i64 %val, i64 %nu
 define i64 @bextr64_c1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bextr64_c1_indexzext:
 ; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    pushl %ebp
+; X86-NOBMI-NEXT:    pushl %ebx
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    subl $12, %esp
 ; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -3387,26 +3871,39 @@ define i64 @bextr64_c1_indexzext(i64 %va
 ; X86-NOBMI-NEXT:  .LBB31_2:
 ; X86-NOBMI-NEXT:    movb $64, %cl
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT:    movl $-1, %eax
-; X86-NOBMI-NEXT:    movl $-1, %edx
-; X86-NOBMI-NEXT:    shrl %cl, %edx
-; X86-NOBMI-NEXT:    shrdl %cl, %eax, %eax
+; X86-NOBMI-NEXT:    movl $-1, %ebp
+; X86-NOBMI-NEXT:    movl $-1, %ebx
+; X86-NOBMI-NEXT:    shrl %cl, %ebx
+; X86-NOBMI-NEXT:    shrdl %cl, %ebp, %ebp
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    je .LBB31_4
 ; X86-NOBMI-NEXT:  # %bb.3:
-; X86-NOBMI-NEXT:    movl %edx, %eax
-; X86-NOBMI-NEXT:    xorl %edx, %edx
+; X86-NOBMI-NEXT:    movl %ebx, %ebp
+; X86-NOBMI-NEXT:    xorl %ebx, %ebx
 ; X86-NOBMI-NEXT:  .LBB31_4:
-; X86-NOBMI-NEXT:    andl %edi, %edx
-; X86-NOBMI-NEXT:    andl %esi, %eax
+; X86-NOBMI-NEXT:    subl $8, %esp
+; X86-NOBMI-NEXT:    pushl %ebx
+; X86-NOBMI-NEXT:    pushl %ebp
+; X86-NOBMI-NEXT:    calll use64
+; X86-NOBMI-NEXT:    addl $16, %esp
+; X86-NOBMI-NEXT:    andl %ebp, %esi
+; X86-NOBMI-NEXT:    andl %ebx, %edi
+; X86-NOBMI-NEXT:    movl %esi, %eax
+; X86-NOBMI-NEXT:    movl %edi, %edx
+; X86-NOBMI-NEXT:    addl $12, %esp
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    popl %edi
+; X86-NOBMI-NEXT:    popl %ebx
+; X86-NOBMI-NEXT:    popl %ebp
 ; X86-NOBMI-NEXT:    retl
 ;
 ; X86-BMI1NOTBM-LABEL: bextr64_c1_indexzext:
 ; X86-BMI1NOTBM:       # %bb.0:
+; X86-BMI1NOTBM-NEXT:    pushl %ebp
+; X86-BMI1NOTBM-NEXT:    pushl %ebx
 ; X86-BMI1NOTBM-NEXT:    pushl %edi
 ; X86-BMI1NOTBM-NEXT:    pushl %esi
+; X86-BMI1NOTBM-NEXT:    subl $12, %esp
 ; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -3421,26 +3918,39 @@ define i64 @bextr64_c1_indexzext(i64 %va
 ; X86-BMI1NOTBM-NEXT:  .LBB31_2:
 ; X86-BMI1NOTBM-NEXT:    movb $64, %cl
 ; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $-1, %eax
-; X86-BMI1NOTBM-NEXT:    movl $-1, %edx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %eax
+; X86-BMI1NOTBM-NEXT:    movl $-1, %ebp
+; X86-BMI1NOTBM-NEXT:    movl $-1, %ebx
+; X86-BMI1NOTBM-NEXT:    shrl %cl, %ebx
+; X86-BMI1NOTBM-NEXT:    shrdl %cl, %ebp, %ebp
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    je .LBB31_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl %edx, %eax
-; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
+; X86-BMI1NOTBM-NEXT:    movl %ebx, %ebp
+; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
 ; X86-BMI1NOTBM-NEXT:  .LBB31_4:
-; X86-BMI1NOTBM-NEXT:    andl %edi, %edx
-; X86-BMI1NOTBM-NEXT:    andl %esi, %eax
+; X86-BMI1NOTBM-NEXT:    subl $8, %esp
+; X86-BMI1NOTBM-NEXT:    pushl %ebx
+; X86-BMI1NOTBM-NEXT:    pushl %ebp
+; X86-BMI1NOTBM-NEXT:    calll use64
+; X86-BMI1NOTBM-NEXT:    addl $16, %esp
+; X86-BMI1NOTBM-NEXT:    andl %ebp, %esi
+; X86-BMI1NOTBM-NEXT:    andl %ebx, %edi
+; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
+; X86-BMI1NOTBM-NEXT:    movl %edi, %edx
+; X86-BMI1NOTBM-NEXT:    addl $12, %esp
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    popl %edi
+; X86-BMI1NOTBM-NEXT:    popl %ebx
+; X86-BMI1NOTBM-NEXT:    popl %ebp
 ; X86-BMI1NOTBM-NEXT:    retl
 ;
 ; X86-BMI1BMI2-LABEL: bextr64_c1_indexzext:
 ; X86-BMI1BMI2:       # %bb.0:
+; X86-BMI1BMI2-NEXT:    pushl %ebp
+; X86-BMI1BMI2-NEXT:    pushl %ebx
 ; X86-BMI1BMI2-NEXT:    pushl %edi
 ; X86-BMI1BMI2-NEXT:    pushl %esi
+; X86-BMI1BMI2-NEXT:    subl $12, %esp
 ; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -3454,55 +3964,99 @@ define i64 @bextr64_c1_indexzext(i64 %va
 ; X86-BMI1BMI2-NEXT:  .LBB31_2:
 ; X86-BMI1BMI2-NEXT:    movb $64, %cl
 ; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl $-1, %eax
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edx
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %eax
+; X86-BMI1BMI2-NEXT:    movl $-1, %ebx
+; X86-BMI1BMI2-NEXT:    shrxl %ecx, %ebx, %ebp
+; X86-BMI1BMI2-NEXT:    shrdl %cl, %ebx, %ebx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
 ; X86-BMI1BMI2-NEXT:    je .LBB31_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    movl %edx, %eax
-; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI1BMI2-NEXT:    movl %ebp, %ebx
+; X86-BMI1BMI2-NEXT:    xorl %ebp, %ebp
 ; X86-BMI1BMI2-NEXT:  .LBB31_4:
-; X86-BMI1BMI2-NEXT:    andl %edi, %edx
-; X86-BMI1BMI2-NEXT:    andl %esi, %eax
+; X86-BMI1BMI2-NEXT:    subl $8, %esp
+; X86-BMI1BMI2-NEXT:    pushl %ebp
+; X86-BMI1BMI2-NEXT:    pushl %ebx
+; X86-BMI1BMI2-NEXT:    calll use64
+; X86-BMI1BMI2-NEXT:    addl $16, %esp
+; X86-BMI1BMI2-NEXT:    andl %ebx, %esi
+; X86-BMI1BMI2-NEXT:    andl %ebp, %edi
+; X86-BMI1BMI2-NEXT:    movl %esi, %eax
+; X86-BMI1BMI2-NEXT:    movl %edi, %edx
+; X86-BMI1BMI2-NEXT:    addl $12, %esp
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    popl %edi
+; X86-BMI1BMI2-NEXT:    popl %ebx
+; X86-BMI1BMI2-NEXT:    popl %ebp
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_c1_indexzext:
 ; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    pushq %r14
+; X64-NOBMI-NEXT:    pushq %rbx
+; X64-NOBMI-NEXT:    pushq %rax
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
-; X64-NOBMI-NEXT:    movq %rdi, %rax
+; X64-NOBMI-NEXT:    movq %rdi, %r14
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI-NEXT:    shrq %cl, %rax
+; X64-NOBMI-NEXT:    shrq %cl, %r14
 ; X64-NOBMI-NEXT:    negb %dl
+; X64-NOBMI-NEXT:    movq $-1, %rbx
 ; X64-NOBMI-NEXT:    movl %edx, %ecx
-; X64-NOBMI-NEXT:    shlq %cl, %rax
-; X64-NOBMI-NEXT:    shrq %cl, %rax
+; X64-NOBMI-NEXT:    shrq %cl, %rbx
+; X64-NOBMI-NEXT:    movq %rbx, %rdi
+; X64-NOBMI-NEXT:    callq use64
+; X64-NOBMI-NEXT:    andq %r14, %rbx
+; X64-NOBMI-NEXT:    movq %rbx, %rax
+; X64-NOBMI-NEXT:    addq $8, %rsp
+; X64-NOBMI-NEXT:    popq %rbx
+; X64-NOBMI-NEXT:    popq %r14
 ; X64-NOBMI-NEXT:    retq
 ;
 ; X64-BMI1NOTBM-LABEL: bextr64_c1_indexzext:
 ; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    # kill: def $edx killed $edx def $rdx
+; X64-BMI1NOTBM-NEXT:    pushq %r14
+; X64-BMI1NOTBM-NEXT:    pushq %rbx
+; X64-BMI1NOTBM-NEXT:    pushq %rax
 ; X64-BMI1NOTBM-NEXT:    movl %esi, %ecx
+; X64-BMI1NOTBM-NEXT:    movq %rdi, %r14
 ; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-BMI1NOTBM-NEXT:    shrq %cl, %rdi
-; X64-BMI1NOTBM-NEXT:    shlq $8, %rdx
-; X64-BMI1NOTBM-NEXT:    bextrq %rdx, %rdi, %rax
+; X64-BMI1NOTBM-NEXT:    shrq %cl, %r14
+; X64-BMI1NOTBM-NEXT:    negb %dl
+; X64-BMI1NOTBM-NEXT:    movq $-1, %rbx
+; X64-BMI1NOTBM-NEXT:    movl %edx, %ecx
+; X64-BMI1NOTBM-NEXT:    shrq %cl, %rbx
+; X64-BMI1NOTBM-NEXT:    movq %rbx, %rdi
+; X64-BMI1NOTBM-NEXT:    callq use64
+; X64-BMI1NOTBM-NEXT:    andq %r14, %rbx
+; X64-BMI1NOTBM-NEXT:    movq %rbx, %rax
+; X64-BMI1NOTBM-NEXT:    addq $8, %rsp
+; X64-BMI1NOTBM-NEXT:    popq %rbx
+; X64-BMI1NOTBM-NEXT:    popq %r14
 ; X64-BMI1NOTBM-NEXT:    retq
 ;
 ; X64-BMI1BMI2-LABEL: bextr64_c1_indexzext:
 ; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    # kill: def $edx killed $edx def $rdx
+; X64-BMI1BMI2-NEXT:    pushq %r14
+; X64-BMI1BMI2-NEXT:    pushq %rbx
+; X64-BMI1BMI2-NEXT:    pushq %rax
+; X64-BMI1BMI2-NEXT:    movl %edx, %ebx
 ; X64-BMI1BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %rax
-; X64-BMI1BMI2-NEXT:    bzhiq %rdx, %rax, %rax
+; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %r14
+; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
+; X64-BMI1BMI2-NEXT:    negb %al
+; X64-BMI1BMI2-NEXT:    movq $-1, %rcx
+; X64-BMI1BMI2-NEXT:    shrxq %rax, %rcx, %rdi
+; X64-BMI1BMI2-NEXT:    callq use64
+; X64-BMI1BMI2-NEXT:    bzhiq %rbx, %r14, %rax
+; X64-BMI1BMI2-NEXT:    addq $8, %rsp
+; X64-BMI1BMI2-NEXT:    popq %rbx
+; X64-BMI1BMI2-NEXT:    popq %r14
 ; X64-BMI1BMI2-NEXT:    retq
   %skip = zext i8 %numskipbits to i64
   %shifted = lshr i64 %val, %skip
   %numhighbits = sub i8 64, %numlowbits
   %sh_prom = zext i8 %numhighbits to i64
   %mask = lshr i64 -1, %sh_prom
+  call void @use64(i64 %mask)
   %masked = and i64 %mask, %shifted
   ret i64 %masked
 }
@@ -3510,8 +4064,11 @@ define i64 @bextr64_c1_indexzext(i64 %va
 define i64 @bextr64_c2_load(i64* %w, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bextr64_c2_load:
 ; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    pushl %ebp
+; X86-NOBMI-NEXT:    pushl %ebx
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    subl $12, %esp
 ; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    movl (%eax), %esi
@@ -3527,26 +4084,39 @@ define i64 @bextr64_c2_load(i64* %w, i64
 ; X86-NOBMI-NEXT:  .LBB32_2:
 ; X86-NOBMI-NEXT:    movl $64, %ecx
 ; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X86-NOBMI-NEXT:    movl $-1, %eax
-; X86-NOBMI-NEXT:    movl $-1, %edx
-; X86-NOBMI-NEXT:    shrl %cl, %edx
-; X86-NOBMI-NEXT:    shrdl %cl, %eax, %eax
+; X86-NOBMI-NEXT:    movl $-1, %ebp
+; X86-NOBMI-NEXT:    movl $-1, %ebx
+; X86-NOBMI-NEXT:    shrl %cl, %ebx
+; X86-NOBMI-NEXT:    shrdl %cl, %ebp, %ebp
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    je .LBB32_4
 ; X86-NOBMI-NEXT:  # %bb.3:
-; X86-NOBMI-NEXT:    movl %edx, %eax
-; X86-NOBMI-NEXT:    xorl %edx, %edx
+; X86-NOBMI-NEXT:    movl %ebx, %ebp
+; X86-NOBMI-NEXT:    xorl %ebx, %ebx
 ; X86-NOBMI-NEXT:  .LBB32_4:
-; X86-NOBMI-NEXT:    andl %edi, %edx
-; X86-NOBMI-NEXT:    andl %esi, %eax
+; X86-NOBMI-NEXT:    subl $8, %esp
+; X86-NOBMI-NEXT:    pushl %ebx
+; X86-NOBMI-NEXT:    pushl %ebp
+; X86-NOBMI-NEXT:    calll use64
+; X86-NOBMI-NEXT:    addl $16, %esp
+; X86-NOBMI-NEXT:    andl %ebp, %esi
+; X86-NOBMI-NEXT:    andl %ebx, %edi
+; X86-NOBMI-NEXT:    movl %esi, %eax
+; X86-NOBMI-NEXT:    movl %edi, %edx
+; X86-NOBMI-NEXT:    addl $12, %esp
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    popl %edi
+; X86-NOBMI-NEXT:    popl %ebx
+; X86-NOBMI-NEXT:    popl %ebp
 ; X86-NOBMI-NEXT:    retl
 ;
 ; X86-BMI1NOTBM-LABEL: bextr64_c2_load:
 ; X86-BMI1NOTBM:       # %bb.0:
+; X86-BMI1NOTBM-NEXT:    pushl %ebp
+; X86-BMI1NOTBM-NEXT:    pushl %ebx
 ; X86-BMI1NOTBM-NEXT:    pushl %edi
 ; X86-BMI1NOTBM-NEXT:    pushl %esi
+; X86-BMI1NOTBM-NEXT:    subl $12, %esp
 ; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1NOTBM-NEXT:    movl (%eax), %esi
@@ -3562,26 +4132,39 @@ define i64 @bextr64_c2_load(i64* %w, i64
 ; X86-BMI1NOTBM-NEXT:  .LBB32_2:
 ; X86-BMI1NOTBM-NEXT:    movl $64, %ecx
 ; X86-BMI1NOTBM-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1NOTBM-NEXT:    movl $-1, %eax
-; X86-BMI1NOTBM-NEXT:    movl $-1, %edx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %eax
+; X86-BMI1NOTBM-NEXT:    movl $-1, %ebp
+; X86-BMI1NOTBM-NEXT:    movl $-1, %ebx
+; X86-BMI1NOTBM-NEXT:    shrl %cl, %ebx
+; X86-BMI1NOTBM-NEXT:    shrdl %cl, %ebp, %ebp
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    je .LBB32_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl %edx, %eax
-; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
+; X86-BMI1NOTBM-NEXT:    movl %ebx, %ebp
+; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
 ; X86-BMI1NOTBM-NEXT:  .LBB32_4:
-; X86-BMI1NOTBM-NEXT:    andl %edi, %edx
-; X86-BMI1NOTBM-NEXT:    andl %esi, %eax
+; X86-BMI1NOTBM-NEXT:    subl $8, %esp
+; X86-BMI1NOTBM-NEXT:    pushl %ebx
+; X86-BMI1NOTBM-NEXT:    pushl %ebp
+; X86-BMI1NOTBM-NEXT:    calll use64
+; X86-BMI1NOTBM-NEXT:    addl $16, %esp
+; X86-BMI1NOTBM-NEXT:    andl %ebp, %esi
+; X86-BMI1NOTBM-NEXT:    andl %ebx, %edi
+; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
+; X86-BMI1NOTBM-NEXT:    movl %edi, %edx
+; X86-BMI1NOTBM-NEXT:    addl $12, %esp
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    popl %edi
+; X86-BMI1NOTBM-NEXT:    popl %ebx
+; X86-BMI1NOTBM-NEXT:    popl %ebp
 ; X86-BMI1NOTBM-NEXT:    retl
 ;
 ; X86-BMI1BMI2-LABEL: bextr64_c2_load:
 ; X86-BMI1BMI2:       # %bb.0:
+; X86-BMI1BMI2-NEXT:    pushl %ebp
+; X86-BMI1BMI2-NEXT:    pushl %ebx
 ; X86-BMI1BMI2-NEXT:    pushl %edi
 ; X86-BMI1BMI2-NEXT:    pushl %esi
+; X86-BMI1BMI2-NEXT:    subl $12, %esp
 ; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1BMI2-NEXT:    movl (%eax), %esi
@@ -3596,52 +4179,97 @@ define i64 @bextr64_c2_load(i64* %w, i64
 ; X86-BMI1BMI2-NEXT:  .LBB32_2:
 ; X86-BMI1BMI2-NEXT:    movl $64, %ecx
 ; X86-BMI1BMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    movl $-1, %eax
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edx
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %eax
+; X86-BMI1BMI2-NEXT:    movl $-1, %ebx
+; X86-BMI1BMI2-NEXT:    shrxl %ecx, %ebx, %ebp
+; X86-BMI1BMI2-NEXT:    shrdl %cl, %ebx, %ebx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
 ; X86-BMI1BMI2-NEXT:    je .LBB32_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    movl %edx, %eax
-; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI1BMI2-NEXT:    movl %ebp, %ebx
+; X86-BMI1BMI2-NEXT:    xorl %ebp, %ebp
 ; X86-BMI1BMI2-NEXT:  .LBB32_4:
-; X86-BMI1BMI2-NEXT:    andl %edi, %edx
-; X86-BMI1BMI2-NEXT:    andl %esi, %eax
+; X86-BMI1BMI2-NEXT:    subl $8, %esp
+; X86-BMI1BMI2-NEXT:    pushl %ebp
+; X86-BMI1BMI2-NEXT:    pushl %ebx
+; X86-BMI1BMI2-NEXT:    calll use64
+; X86-BMI1BMI2-NEXT:    addl $16, %esp
+; X86-BMI1BMI2-NEXT:    andl %ebx, %esi
+; X86-BMI1BMI2-NEXT:    andl %ebp, %edi
+; X86-BMI1BMI2-NEXT:    movl %esi, %eax
+; X86-BMI1BMI2-NEXT:    movl %edi, %edx
+; X86-BMI1BMI2-NEXT:    addl $12, %esp
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    popl %edi
+; X86-BMI1BMI2-NEXT:    popl %ebx
+; X86-BMI1BMI2-NEXT:    popl %ebp
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_c2_load:
 ; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    pushq %r14
+; X64-NOBMI-NEXT:    pushq %rbx
+; X64-NOBMI-NEXT:    pushq %rax
 ; X64-NOBMI-NEXT:    movq %rsi, %rcx
-; X64-NOBMI-NEXT:    movq (%rdi), %rax
+; X64-NOBMI-NEXT:    movq (%rdi), %r14
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
-; X64-NOBMI-NEXT:    shrq %cl, %rax
+; X64-NOBMI-NEXT:    shrq %cl, %r14
 ; X64-NOBMI-NEXT:    negl %edx
+; X64-NOBMI-NEXT:    movq $-1, %rbx
 ; X64-NOBMI-NEXT:    movl %edx, %ecx
-; X64-NOBMI-NEXT:    shlq %cl, %rax
-; X64-NOBMI-NEXT:    shrq %cl, %rax
+; X64-NOBMI-NEXT:    shrq %cl, %rbx
+; X64-NOBMI-NEXT:    movq %rbx, %rdi
+; X64-NOBMI-NEXT:    callq use64
+; X64-NOBMI-NEXT:    andq %r14, %rbx
+; X64-NOBMI-NEXT:    movq %rbx, %rax
+; X64-NOBMI-NEXT:    addq $8, %rsp
+; X64-NOBMI-NEXT:    popq %rbx
+; X64-NOBMI-NEXT:    popq %r14
 ; X64-NOBMI-NEXT:    retq
 ;
 ; X64-BMI1NOTBM-LABEL: bextr64_c2_load:
 ; X64-BMI1NOTBM:       # %bb.0:
+; X64-BMI1NOTBM-NEXT:    pushq %r14
+; X64-BMI1NOTBM-NEXT:    pushq %rbx
+; X64-BMI1NOTBM-NEXT:    pushq %rax
 ; X64-BMI1NOTBM-NEXT:    movq %rsi, %rcx
-; X64-BMI1NOTBM-NEXT:    movq (%rdi), %rax
+; X64-BMI1NOTBM-NEXT:    movq (%rdi), %r14
 ; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $rcx
-; X64-BMI1NOTBM-NEXT:    shrq %cl, %rax
-; X64-BMI1NOTBM-NEXT:    shlq $8, %rdx
-; X64-BMI1NOTBM-NEXT:    bextrq %rdx, %rax, %rax
+; X64-BMI1NOTBM-NEXT:    shrq %cl, %r14
+; X64-BMI1NOTBM-NEXT:    negl %edx
+; X64-BMI1NOTBM-NEXT:    movq $-1, %rbx
+; X64-BMI1NOTBM-NEXT:    movl %edx, %ecx
+; X64-BMI1NOTBM-NEXT:    shrq %cl, %rbx
+; X64-BMI1NOTBM-NEXT:    movq %rbx, %rdi
+; X64-BMI1NOTBM-NEXT:    callq use64
+; X64-BMI1NOTBM-NEXT:    andq %r14, %rbx
+; X64-BMI1NOTBM-NEXT:    movq %rbx, %rax
+; X64-BMI1NOTBM-NEXT:    addq $8, %rsp
+; X64-BMI1NOTBM-NEXT:    popq %rbx
+; X64-BMI1NOTBM-NEXT:    popq %r14
 ; X64-BMI1NOTBM-NEXT:    retq
 ;
 ; X64-BMI1BMI2-LABEL: bextr64_c2_load:
 ; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, (%rdi), %rax
-; X64-BMI1BMI2-NEXT:    bzhiq %rdx, %rax, %rax
+; X64-BMI1BMI2-NEXT:    pushq %r14
+; X64-BMI1BMI2-NEXT:    pushq %rbx
+; X64-BMI1BMI2-NEXT:    pushq %rax
+; X64-BMI1BMI2-NEXT:    movq %rdx, %rbx
+; X64-BMI1BMI2-NEXT:    shrxq %rsi, (%rdi), %r14
+; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
+; X64-BMI1BMI2-NEXT:    negl %eax
+; X64-BMI1BMI2-NEXT:    movq $-1, %rcx
+; X64-BMI1BMI2-NEXT:    shrxq %rax, %rcx, %rdi
+; X64-BMI1BMI2-NEXT:    callq use64
+; X64-BMI1BMI2-NEXT:    bzhiq %rbx, %r14, %rax
+; X64-BMI1BMI2-NEXT:    addq $8, %rsp
+; X64-BMI1BMI2-NEXT:    popq %rbx
+; X64-BMI1BMI2-NEXT:    popq %r14
 ; X64-BMI1BMI2-NEXT:    retq
   %val = load i64, i64* %w
   %shifted = lshr i64 %val, %numskipbits
   %numhighbits = sub i64 64, %numlowbits
   %mask = lshr i64 -1, %numhighbits
+  call void @use64(i64 %mask)
   %masked = and i64 %mask, %shifted
   ret i64 %masked
 }
@@ -3649,8 +4277,11 @@ define i64 @bextr64_c2_load(i64* %w, i64
 define i64 @bextr64_c3_load_indexzext(i64* %w, i8 %numskipbits, i8 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bextr64_c3_load_indexzext:
 ; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    pushl %ebp
+; X86-NOBMI-NEXT:    pushl %ebx
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    subl $12, %esp
 ; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    movl (%eax), %esi
@@ -3666,26 +4297,39 @@ define i64 @bextr64_c3_load_indexzext(i6
 ; X86-NOBMI-NEXT:  .LBB33_2:
 ; X86-NOBMI-NEXT:    movb $64, %cl
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT:    movl $-1, %eax
-; X86-NOBMI-NEXT:    movl $-1, %edx
-; X86-NOBMI-NEXT:    shrl %cl, %edx
-; X86-NOBMI-NEXT:    shrdl %cl, %eax, %eax
+; X86-NOBMI-NEXT:    movl $-1, %ebp
+; X86-NOBMI-NEXT:    movl $-1, %ebx
+; X86-NOBMI-NEXT:    shrl %cl, %ebx
+; X86-NOBMI-NEXT:    shrdl %cl, %ebp, %ebp
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    je .LBB33_4
 ; X86-NOBMI-NEXT:  # %bb.3:
-; X86-NOBMI-NEXT:    movl %edx, %eax
-; X86-NOBMI-NEXT:    xorl %edx, %edx
+; X86-NOBMI-NEXT:    movl %ebx, %ebp
+; X86-NOBMI-NEXT:    xorl %ebx, %ebx
 ; X86-NOBMI-NEXT:  .LBB33_4:
-; X86-NOBMI-NEXT:    andl %edi, %edx
-; X86-NOBMI-NEXT:    andl %esi, %eax
+; X86-NOBMI-NEXT:    subl $8, %esp
+; X86-NOBMI-NEXT:    pushl %ebx
+; X86-NOBMI-NEXT:    pushl %ebp
+; X86-NOBMI-NEXT:    calll use64
+; X86-NOBMI-NEXT:    addl $16, %esp
+; X86-NOBMI-NEXT:    andl %ebp, %esi
+; X86-NOBMI-NEXT:    andl %ebx, %edi
+; X86-NOBMI-NEXT:    movl %esi, %eax
+; X86-NOBMI-NEXT:    movl %edi, %edx
+; X86-NOBMI-NEXT:    addl $12, %esp
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    popl %edi
+; X86-NOBMI-NEXT:    popl %ebx
+; X86-NOBMI-NEXT:    popl %ebp
 ; X86-NOBMI-NEXT:    retl
 ;
 ; X86-BMI1NOTBM-LABEL: bextr64_c3_load_indexzext:
 ; X86-BMI1NOTBM:       # %bb.0:
+; X86-BMI1NOTBM-NEXT:    pushl %ebp
+; X86-BMI1NOTBM-NEXT:    pushl %ebx
 ; X86-BMI1NOTBM-NEXT:    pushl %edi
 ; X86-BMI1NOTBM-NEXT:    pushl %esi
+; X86-BMI1NOTBM-NEXT:    subl $12, %esp
 ; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1NOTBM-NEXT:    movl (%eax), %esi
@@ -3701,26 +4345,39 @@ define i64 @bextr64_c3_load_indexzext(i6
 ; X86-BMI1NOTBM-NEXT:  .LBB33_2:
 ; X86-BMI1NOTBM-NEXT:    movb $64, %cl
 ; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $-1, %eax
-; X86-BMI1NOTBM-NEXT:    movl $-1, %edx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %eax
+; X86-BMI1NOTBM-NEXT:    movl $-1, %ebp
+; X86-BMI1NOTBM-NEXT:    movl $-1, %ebx
+; X86-BMI1NOTBM-NEXT:    shrl %cl, %ebx
+; X86-BMI1NOTBM-NEXT:    shrdl %cl, %ebp, %ebp
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    je .LBB33_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl %edx, %eax
-; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
+; X86-BMI1NOTBM-NEXT:    movl %ebx, %ebp
+; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
 ; X86-BMI1NOTBM-NEXT:  .LBB33_4:
-; X86-BMI1NOTBM-NEXT:    andl %edi, %edx
-; X86-BMI1NOTBM-NEXT:    andl %esi, %eax
+; X86-BMI1NOTBM-NEXT:    subl $8, %esp
+; X86-BMI1NOTBM-NEXT:    pushl %ebx
+; X86-BMI1NOTBM-NEXT:    pushl %ebp
+; X86-BMI1NOTBM-NEXT:    calll use64
+; X86-BMI1NOTBM-NEXT:    addl $16, %esp
+; X86-BMI1NOTBM-NEXT:    andl %ebp, %esi
+; X86-BMI1NOTBM-NEXT:    andl %ebx, %edi
+; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
+; X86-BMI1NOTBM-NEXT:    movl %edi, %edx
+; X86-BMI1NOTBM-NEXT:    addl $12, %esp
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    popl %edi
+; X86-BMI1NOTBM-NEXT:    popl %ebx
+; X86-BMI1NOTBM-NEXT:    popl %ebp
 ; X86-BMI1NOTBM-NEXT:    retl
 ;
 ; X86-BMI1BMI2-LABEL: bextr64_c3_load_indexzext:
 ; X86-BMI1BMI2:       # %bb.0:
+; X86-BMI1BMI2-NEXT:    pushl %ebp
+; X86-BMI1BMI2-NEXT:    pushl %ebx
 ; X86-BMI1BMI2-NEXT:    pushl %edi
 ; X86-BMI1BMI2-NEXT:    pushl %esi
+; X86-BMI1BMI2-NEXT:    subl $12, %esp
 ; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1BMI2-NEXT:    movl (%eax), %esi
@@ -3735,50 +4392,92 @@ define i64 @bextr64_c3_load_indexzext(i6
 ; X86-BMI1BMI2-NEXT:  .LBB33_2:
 ; X86-BMI1BMI2-NEXT:    movb $64, %cl
 ; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl $-1, %eax
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edx
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %eax
+; X86-BMI1BMI2-NEXT:    movl $-1, %ebx
+; X86-BMI1BMI2-NEXT:    shrxl %ecx, %ebx, %ebp
+; X86-BMI1BMI2-NEXT:    shrdl %cl, %ebx, %ebx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
 ; X86-BMI1BMI2-NEXT:    je .LBB33_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    movl %edx, %eax
-; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI1BMI2-NEXT:    movl %ebp, %ebx
+; X86-BMI1BMI2-NEXT:    xorl %ebp, %ebp
 ; X86-BMI1BMI2-NEXT:  .LBB33_4:
-; X86-BMI1BMI2-NEXT:    andl %edi, %edx
-; X86-BMI1BMI2-NEXT:    andl %esi, %eax
+; X86-BMI1BMI2-NEXT:    subl $8, %esp
+; X86-BMI1BMI2-NEXT:    pushl %ebp
+; X86-BMI1BMI2-NEXT:    pushl %ebx
+; X86-BMI1BMI2-NEXT:    calll use64
+; X86-BMI1BMI2-NEXT:    addl $16, %esp
+; X86-BMI1BMI2-NEXT:    andl %ebx, %esi
+; X86-BMI1BMI2-NEXT:    andl %ebp, %edi
+; X86-BMI1BMI2-NEXT:    movl %esi, %eax
+; X86-BMI1BMI2-NEXT:    movl %edi, %edx
+; X86-BMI1BMI2-NEXT:    addl $12, %esp
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    popl %edi
+; X86-BMI1BMI2-NEXT:    popl %ebx
+; X86-BMI1BMI2-NEXT:    popl %ebp
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_c3_load_indexzext:
 ; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    pushq %r14
+; X64-NOBMI-NEXT:    pushq %rbx
+; X64-NOBMI-NEXT:    pushq %rax
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
-; X64-NOBMI-NEXT:    movq (%rdi), %rax
+; X64-NOBMI-NEXT:    movq (%rdi), %r14
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI-NEXT:    shrq %cl, %rax
+; X64-NOBMI-NEXT:    shrq %cl, %r14
 ; X64-NOBMI-NEXT:    negb %dl
+; X64-NOBMI-NEXT:    movq $-1, %rbx
 ; X64-NOBMI-NEXT:    movl %edx, %ecx
-; X64-NOBMI-NEXT:    shlq %cl, %rax
-; X64-NOBMI-NEXT:    shrq %cl, %rax
+; X64-NOBMI-NEXT:    shrq %cl, %rbx
+; X64-NOBMI-NEXT:    movq %rbx, %rdi
+; X64-NOBMI-NEXT:    callq use64
+; X64-NOBMI-NEXT:    andq %r14, %rbx
+; X64-NOBMI-NEXT:    movq %rbx, %rax
+; X64-NOBMI-NEXT:    addq $8, %rsp
+; X64-NOBMI-NEXT:    popq %rbx
+; X64-NOBMI-NEXT:    popq %r14
 ; X64-NOBMI-NEXT:    retq
 ;
 ; X64-BMI1NOTBM-LABEL: bextr64_c3_load_indexzext:
 ; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    # kill: def $edx killed $edx def $rdx
+; X64-BMI1NOTBM-NEXT:    pushq %r14
+; X64-BMI1NOTBM-NEXT:    pushq %rbx
+; X64-BMI1NOTBM-NEXT:    pushq %rax
 ; X64-BMI1NOTBM-NEXT:    movl %esi, %ecx
-; X64-BMI1NOTBM-NEXT:    movq (%rdi), %rax
+; X64-BMI1NOTBM-NEXT:    movq (%rdi), %r14
 ; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-BMI1NOTBM-NEXT:    shrq %cl, %rax
-; X64-BMI1NOTBM-NEXT:    shlq $8, %rdx
-; X64-BMI1NOTBM-NEXT:    bextrq %rdx, %rax, %rax
+; X64-BMI1NOTBM-NEXT:    shrq %cl, %r14
+; X64-BMI1NOTBM-NEXT:    negb %dl
+; X64-BMI1NOTBM-NEXT:    movq $-1, %rbx
+; X64-BMI1NOTBM-NEXT:    movl %edx, %ecx
+; X64-BMI1NOTBM-NEXT:    shrq %cl, %rbx
+; X64-BMI1NOTBM-NEXT:    movq %rbx, %rdi
+; X64-BMI1NOTBM-NEXT:    callq use64
+; X64-BMI1NOTBM-NEXT:    andq %r14, %rbx
+; X64-BMI1NOTBM-NEXT:    movq %rbx, %rax
+; X64-BMI1NOTBM-NEXT:    addq $8, %rsp
+; X64-BMI1NOTBM-NEXT:    popq %rbx
+; X64-BMI1NOTBM-NEXT:    popq %r14
 ; X64-BMI1NOTBM-NEXT:    retq
 ;
 ; X64-BMI1BMI2-LABEL: bextr64_c3_load_indexzext:
 ; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    # kill: def $edx killed $edx def $rdx
+; X64-BMI1BMI2-NEXT:    pushq %r14
+; X64-BMI1BMI2-NEXT:    pushq %rbx
+; X64-BMI1BMI2-NEXT:    pushq %rax
+; X64-BMI1BMI2-NEXT:    movl %edx, %ebx
 ; X64-BMI1BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, (%rdi), %rax
-; X64-BMI1BMI2-NEXT:    bzhiq %rdx, %rax, %rax
+; X64-BMI1BMI2-NEXT:    shrxq %rsi, (%rdi), %r14
+; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
+; X64-BMI1BMI2-NEXT:    negb %al
+; X64-BMI1BMI2-NEXT:    movq $-1, %rcx
+; X64-BMI1BMI2-NEXT:    shrxq %rax, %rcx, %rdi
+; X64-BMI1BMI2-NEXT:    callq use64
+; X64-BMI1BMI2-NEXT:    bzhiq %rbx, %r14, %rax
+; X64-BMI1BMI2-NEXT:    addq $8, %rsp
+; X64-BMI1BMI2-NEXT:    popq %rbx
+; X64-BMI1BMI2-NEXT:    popq %r14
 ; X64-BMI1BMI2-NEXT:    retq
   %val = load i64, i64* %w
   %skip = zext i8 %numskipbits to i64
@@ -3786,6 +4485,7 @@ define i64 @bextr64_c3_load_indexzext(i6
   %numhighbits = sub i8 64, %numlowbits
   %sh_prom = zext i8 %numhighbits to i64
   %mask = lshr i64 -1, %sh_prom
+  call void @use64(i64 %mask)
   %masked = and i64 %mask, %shifted
   ret i64 %masked
 }
@@ -3793,133 +4493,208 @@ define i64 @bextr64_c3_load_indexzext(i6
 define i64 @bextr64_c4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bextr64_c4_commutative:
 ; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    pushl %ebp
+; X86-NOBMI-NEXT:    pushl %ebx
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    subl $12, %esp
 ; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NOBMI-NEXT:    movl %esi, %edx
-; X86-NOBMI-NEXT:    shrl %cl, %edx
-; X86-NOBMI-NEXT:    shrdl %cl, %esi, %eax
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    movl %eax, %edi
+; X86-NOBMI-NEXT:    shrl %cl, %edi
+; X86-NOBMI-NEXT:    shrdl %cl, %eax, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    je .LBB34_2
 ; X86-NOBMI-NEXT:  # %bb.1:
-; X86-NOBMI-NEXT:    movl %edx, %eax
-; X86-NOBMI-NEXT:    xorl %edx, %edx
+; X86-NOBMI-NEXT:    movl %edi, %esi
+; X86-NOBMI-NEXT:    xorl %edi, %edi
 ; X86-NOBMI-NEXT:  .LBB34_2:
 ; X86-NOBMI-NEXT:    movl $64, %ecx
 ; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X86-NOBMI-NEXT:    movl $-1, %esi
-; X86-NOBMI-NEXT:    movl $-1, %edi
-; X86-NOBMI-NEXT:    shrl %cl, %edi
-; X86-NOBMI-NEXT:    shrdl %cl, %esi, %esi
+; X86-NOBMI-NEXT:    movl $-1, %ebp
+; X86-NOBMI-NEXT:    movl $-1, %ebx
+; X86-NOBMI-NEXT:    shrl %cl, %ebx
+; X86-NOBMI-NEXT:    shrdl %cl, %ebp, %ebp
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    je .LBB34_4
 ; X86-NOBMI-NEXT:  # %bb.3:
-; X86-NOBMI-NEXT:    movl %edi, %esi
-; X86-NOBMI-NEXT:    xorl %edi, %edi
+; X86-NOBMI-NEXT:    movl %ebx, %ebp
+; X86-NOBMI-NEXT:    xorl %ebx, %ebx
 ; X86-NOBMI-NEXT:  .LBB34_4:
-; X86-NOBMI-NEXT:    andl %edi, %edx
-; X86-NOBMI-NEXT:    andl %esi, %eax
+; X86-NOBMI-NEXT:    subl $8, %esp
+; X86-NOBMI-NEXT:    pushl %ebx
+; X86-NOBMI-NEXT:    pushl %ebp
+; X86-NOBMI-NEXT:    calll use64
+; X86-NOBMI-NEXT:    addl $16, %esp
+; X86-NOBMI-NEXT:    andl %ebp, %esi
+; X86-NOBMI-NEXT:    andl %ebx, %edi
+; X86-NOBMI-NEXT:    movl %esi, %eax
+; X86-NOBMI-NEXT:    movl %edi, %edx
+; X86-NOBMI-NEXT:    addl $12, %esp
 ; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    popl %edi
+; X86-NOBMI-NEXT:    popl %ebx
+; X86-NOBMI-NEXT:    popl %ebp
 ; X86-NOBMI-NEXT:    retl
 ;
 ; X86-BMI1NOTBM-LABEL: bextr64_c4_commutative:
 ; X86-BMI1NOTBM:       # %bb.0:
+; X86-BMI1NOTBM-NEXT:    pushl %ebp
+; X86-BMI1NOTBM-NEXT:    pushl %ebx
 ; X86-BMI1NOTBM-NEXT:    pushl %edi
 ; X86-BMI1NOTBM-NEXT:    pushl %esi
+; X86-BMI1NOTBM-NEXT:    subl $12, %esp
 ; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1NOTBM-NEXT:    movl %esi, %edx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %esi, %eax
+; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
+; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
+; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    je .LBB34_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %edx, %eax
-; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
+; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
+; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
 ; X86-BMI1NOTBM-NEXT:  .LBB34_2:
 ; X86-BMI1NOTBM-NEXT:    movl $64, %ecx
 ; X86-BMI1NOTBM-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
-; X86-BMI1NOTBM-NEXT:    movl $-1, %edi
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %esi, %esi
+; X86-BMI1NOTBM-NEXT:    movl $-1, %ebp
+; X86-BMI1NOTBM-NEXT:    movl $-1, %ebx
+; X86-BMI1NOTBM-NEXT:    shrl %cl, %ebx
+; X86-BMI1NOTBM-NEXT:    shrdl %cl, %ebp, %ebp
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    je .LBB34_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
-; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
+; X86-BMI1NOTBM-NEXT:    movl %ebx, %ebp
+; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
 ; X86-BMI1NOTBM-NEXT:  .LBB34_4:
-; X86-BMI1NOTBM-NEXT:    andl %edi, %edx
-; X86-BMI1NOTBM-NEXT:    andl %esi, %eax
+; X86-BMI1NOTBM-NEXT:    subl $8, %esp
+; X86-BMI1NOTBM-NEXT:    pushl %ebx
+; X86-BMI1NOTBM-NEXT:    pushl %ebp
+; X86-BMI1NOTBM-NEXT:    calll use64
+; X86-BMI1NOTBM-NEXT:    addl $16, %esp
+; X86-BMI1NOTBM-NEXT:    andl %ebp, %esi
+; X86-BMI1NOTBM-NEXT:    andl %ebx, %edi
+; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
+; X86-BMI1NOTBM-NEXT:    movl %edi, %edx
+; X86-BMI1NOTBM-NEXT:    addl $12, %esp
 ; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    popl %edi
+; X86-BMI1NOTBM-NEXT:    popl %ebx
+; X86-BMI1NOTBM-NEXT:    popl %ebp
 ; X86-BMI1NOTBM-NEXT:    retl
 ;
 ; X86-BMI1BMI2-LABEL: bextr64_c4_commutative:
 ; X86-BMI1BMI2:       # %bb.0:
+; X86-BMI1BMI2-NEXT:    pushl %ebp
+; X86-BMI1BMI2-NEXT:    pushl %ebx
 ; X86-BMI1BMI2-NEXT:    pushl %edi
 ; X86-BMI1BMI2-NEXT:    pushl %esi
+; X86-BMI1BMI2-NEXT:    subl $12, %esp
 ; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %edx, %eax
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edx, %edx
+; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %esi
+; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
 ; X86-BMI1BMI2-NEXT:    je .LBB34_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %edx, %eax
-; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI1BMI2-NEXT:    movl %edi, %esi
+; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
 ; X86-BMI1BMI2-NEXT:  .LBB34_2:
 ; X86-BMI1BMI2-NEXT:    movl $64, %ecx
 ; X86-BMI1BMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    movl $-1, %esi
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %edi
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %esi
+; X86-BMI1BMI2-NEXT:    movl $-1, %ebx
+; X86-BMI1BMI2-NEXT:    shrxl %ecx, %ebx, %ebp
+; X86-BMI1BMI2-NEXT:    shrdl %cl, %ebx, %ebx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
 ; X86-BMI1BMI2-NEXT:    je .LBB34_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    movl %edi, %esi
-; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
+; X86-BMI1BMI2-NEXT:    movl %ebp, %ebx
+; X86-BMI1BMI2-NEXT:    xorl %ebp, %ebp
 ; X86-BMI1BMI2-NEXT:  .LBB34_4:
-; X86-BMI1BMI2-NEXT:    andl %edi, %edx
-; X86-BMI1BMI2-NEXT:    andl %esi, %eax
+; X86-BMI1BMI2-NEXT:    subl $8, %esp
+; X86-BMI1BMI2-NEXT:    pushl %ebp
+; X86-BMI1BMI2-NEXT:    pushl %ebx
+; X86-BMI1BMI2-NEXT:    calll use64
+; X86-BMI1BMI2-NEXT:    addl $16, %esp
+; X86-BMI1BMI2-NEXT:    andl %ebx, %esi
+; X86-BMI1BMI2-NEXT:    andl %ebp, %edi
+; X86-BMI1BMI2-NEXT:    movl %esi, %eax
+; X86-BMI1BMI2-NEXT:    movl %edi, %edx
+; X86-BMI1BMI2-NEXT:    addl $12, %esp
 ; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    popl %edi
+; X86-BMI1BMI2-NEXT:    popl %ebx
+; X86-BMI1BMI2-NEXT:    popl %ebp
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bextr64_c4_commutative:
 ; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    pushq %r14
+; X64-NOBMI-NEXT:    pushq %rbx
+; X64-NOBMI-NEXT:    pushq %rax
 ; X64-NOBMI-NEXT:    movq %rsi, %rcx
-; X64-NOBMI-NEXT:    movq %rdi, %rax
+; X64-NOBMI-NEXT:    movq %rdi, %r14
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
-; X64-NOBMI-NEXT:    shrq %cl, %rax
+; X64-NOBMI-NEXT:    shrq %cl, %r14
 ; X64-NOBMI-NEXT:    negl %edx
+; X64-NOBMI-NEXT:    movq $-1, %rbx
 ; X64-NOBMI-NEXT:    movl %edx, %ecx
-; X64-NOBMI-NEXT:    shlq %cl, %rax
-; X64-NOBMI-NEXT:    shrq %cl, %rax
+; X64-NOBMI-NEXT:    shrq %cl, %rbx
+; X64-NOBMI-NEXT:    movq %rbx, %rdi
+; X64-NOBMI-NEXT:    callq use64
+; X64-NOBMI-NEXT:    andq %r14, %rbx
+; X64-NOBMI-NEXT:    movq %rbx, %rax
+; X64-NOBMI-NEXT:    addq $8, %rsp
+; X64-NOBMI-NEXT:    popq %rbx
+; X64-NOBMI-NEXT:    popq %r14
 ; X64-NOBMI-NEXT:    retq
 ;
 ; X64-BMI1NOTBM-LABEL: bextr64_c4_commutative:
 ; X64-BMI1NOTBM:       # %bb.0:
+; X64-BMI1NOTBM-NEXT:    pushq %r14
+; X64-BMI1NOTBM-NEXT:    pushq %rbx
+; X64-BMI1NOTBM-NEXT:    pushq %rax
 ; X64-BMI1NOTBM-NEXT:    movq %rsi, %rcx
+; X64-BMI1NOTBM-NEXT:    movq %rdi, %r14
 ; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $rcx
-; X64-BMI1NOTBM-NEXT:    shrq %cl, %rdi
-; X64-BMI1NOTBM-NEXT:    shlq $8, %rdx
-; X64-BMI1NOTBM-NEXT:    bextrq %rdx, %rdi, %rax
+; X64-BMI1NOTBM-NEXT:    shrq %cl, %r14
+; X64-BMI1NOTBM-NEXT:    negl %edx
+; X64-BMI1NOTBM-NEXT:    movq $-1, %rbx
+; X64-BMI1NOTBM-NEXT:    movl %edx, %ecx
+; X64-BMI1NOTBM-NEXT:    shrq %cl, %rbx
+; X64-BMI1NOTBM-NEXT:    movq %rbx, %rdi
+; X64-BMI1NOTBM-NEXT:    callq use64
+; X64-BMI1NOTBM-NEXT:    andq %r14, %rbx
+; X64-BMI1NOTBM-NEXT:    movq %rbx, %rax
+; X64-BMI1NOTBM-NEXT:    addq $8, %rsp
+; X64-BMI1NOTBM-NEXT:    popq %rbx
+; X64-BMI1NOTBM-NEXT:    popq %r14
 ; X64-BMI1NOTBM-NEXT:    retq
 ;
 ; X64-BMI1BMI2-LABEL: bextr64_c4_commutative:
 ; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %rax
-; X64-BMI1BMI2-NEXT:    bzhiq %rdx, %rax, %rax
+; X64-BMI1BMI2-NEXT:    pushq %r14
+; X64-BMI1BMI2-NEXT:    pushq %rbx
+; X64-BMI1BMI2-NEXT:    pushq %rax
+; X64-BMI1BMI2-NEXT:    movq %rdx, %rbx
+; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %r14
+; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
+; X64-BMI1BMI2-NEXT:    negl %eax
+; X64-BMI1BMI2-NEXT:    movq $-1, %rcx
+; X64-BMI1BMI2-NEXT:    shrxq %rax, %rcx, %rdi
+; X64-BMI1BMI2-NEXT:    callq use64
+; X64-BMI1BMI2-NEXT:    bzhiq %rbx, %r14, %rax
+; X64-BMI1BMI2-NEXT:    addq $8, %rsp
+; X64-BMI1BMI2-NEXT:    popq %rbx
+; X64-BMI1BMI2-NEXT:    popq %r14
 ; X64-BMI1BMI2-NEXT:    retq
   %shifted = lshr i64 %val, %numskipbits
   %numhighbits = sub i64 64, %numlowbits
   %mask = lshr i64 -1, %numhighbits
+  call void @use64(i64 %mask)
   %masked = and i64 %shifted, %mask ; swapped order
   ret i64 %masked
 }
@@ -3932,37 +4707,40 @@ define i64 @bextr64_c5_skipextrauses(i64
 ; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
 ; X86-NOBMI-NEXT:    subl $12, %esp
-; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI-NEXT:    movl %esi, %ebx
-; X86-NOBMI-NEXT:    movl %eax, %ecx
-; X86-NOBMI-NEXT:    shrl %cl, %ebx
-; X86-NOBMI-NEXT:    shrdl %cl, %esi, %edx
-; X86-NOBMI-NEXT:    testb $32, %al
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movl %eax, %edi
+; X86-NOBMI-NEXT:    shrl %cl, %edi
+; X86-NOBMI-NEXT:    shrdl %cl, %eax, %esi
+; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    je .LBB35_2
 ; X86-NOBMI-NEXT:  # %bb.1:
-; X86-NOBMI-NEXT:    movl %ebx, %edx
-; X86-NOBMI-NEXT:    xorl %ebx, %ebx
+; X86-NOBMI-NEXT:    movl %edi, %esi
+; X86-NOBMI-NEXT:    xorl %edi, %edi
 ; X86-NOBMI-NEXT:  .LBB35_2:
-; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ebp
 ; X86-NOBMI-NEXT:    movl $64, %ecx
 ; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X86-NOBMI-NEXT:    movl $-1, %esi
-; X86-NOBMI-NEXT:    movl $-1, %edi
-; X86-NOBMI-NEXT:    shrl %cl, %edi
-; X86-NOBMI-NEXT:    shrdl %cl, %esi, %esi
+; X86-NOBMI-NEXT:    movl $-1, %ebx
+; X86-NOBMI-NEXT:    movl $-1, %ebp
+; X86-NOBMI-NEXT:    shrl %cl, %ebp
+; X86-NOBMI-NEXT:    shrdl %cl, %ebx, %ebx
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    je .LBB35_4
 ; X86-NOBMI-NEXT:  # %bb.3:
-; X86-NOBMI-NEXT:    movl %edi, %esi
-; X86-NOBMI-NEXT:    xorl %edi, %edi
+; X86-NOBMI-NEXT:    movl %ebp, %ebx
+; X86-NOBMI-NEXT:    xorl %ebp, %ebp
 ; X86-NOBMI-NEXT:  .LBB35_4:
-; X86-NOBMI-NEXT:    andl %ebx, %edi
-; X86-NOBMI-NEXT:    andl %edx, %esi
 ; X86-NOBMI-NEXT:    subl $8, %esp
 ; X86-NOBMI-NEXT:    pushl %ebp
-; X86-NOBMI-NEXT:    pushl %eax
+; X86-NOBMI-NEXT:    pushl %ebx
+; X86-NOBMI-NEXT:    calll use64
+; X86-NOBMI-NEXT:    addl $16, %esp
+; X86-NOBMI-NEXT:    andl %ebx, %esi
+; X86-NOBMI-NEXT:    andl %ebp, %edi
+; X86-NOBMI-NEXT:    subl $8, %esp
+; X86-NOBMI-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-NOBMI-NEXT:    calll use64
 ; X86-NOBMI-NEXT:    addl $16, %esp
 ; X86-NOBMI-NEXT:    movl %esi, %eax
@@ -3981,37 +4759,40 @@ define i64 @bextr64_c5_skipextrauses(i64
 ; X86-BMI1NOTBM-NEXT:    pushl %edi
 ; X86-BMI1NOTBM-NEXT:    pushl %esi
 ; X86-BMI1NOTBM-NEXT:    subl $12, %esp
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movl %esi, %ebx
-; X86-BMI1NOTBM-NEXT:    movl %eax, %ecx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %ebx
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %esi, %edx
-; X86-BMI1NOTBM-NEXT:    testb $32, %al
+; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1NOTBM-NEXT:    movl %eax, %edi
+; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
+; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %esi
+; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    je .LBB35_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %ebx, %edx
-; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
+; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
+; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
 ; X86-BMI1NOTBM-NEXT:  .LBB35_2:
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %ebp
 ; X86-BMI1NOTBM-NEXT:    movl $64, %ecx
 ; X86-BMI1NOTBM-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
-; X86-BMI1NOTBM-NEXT:    movl $-1, %edi
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %esi, %esi
+; X86-BMI1NOTBM-NEXT:    movl $-1, %ebx
+; X86-BMI1NOTBM-NEXT:    movl $-1, %ebp
+; X86-BMI1NOTBM-NEXT:    shrl %cl, %ebp
+; X86-BMI1NOTBM-NEXT:    shrdl %cl, %ebx, %ebx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    je .LBB35_4
 ; X86-BMI1NOTBM-NEXT:  # %bb.3:
-; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
-; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
+; X86-BMI1NOTBM-NEXT:    movl %ebp, %ebx
+; X86-BMI1NOTBM-NEXT:    xorl %ebp, %ebp
 ; X86-BMI1NOTBM-NEXT:  .LBB35_4:
-; X86-BMI1NOTBM-NEXT:    andl %ebx, %edi
-; X86-BMI1NOTBM-NEXT:    andl %edx, %esi
 ; X86-BMI1NOTBM-NEXT:    subl $8, %esp
 ; X86-BMI1NOTBM-NEXT:    pushl %ebp
-; X86-BMI1NOTBM-NEXT:    pushl %eax
+; X86-BMI1NOTBM-NEXT:    pushl %ebx
+; X86-BMI1NOTBM-NEXT:    calll use64
+; X86-BMI1NOTBM-NEXT:    addl $16, %esp
+; X86-BMI1NOTBM-NEXT:    andl %ebx, %esi
+; X86-BMI1NOTBM-NEXT:    andl %ebp, %edi
+; X86-BMI1NOTBM-NEXT:    subl $8, %esp
+; X86-BMI1NOTBM-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-BMI1NOTBM-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-BMI1NOTBM-NEXT:    calll use64
 ; X86-BMI1NOTBM-NEXT:    addl $16, %esp
 ; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
@@ -4030,35 +4811,38 @@ define i64 @bextr64_c5_skipextrauses(i64
 ; X86-BMI1BMI2-NEXT:    pushl %edi
 ; X86-BMI1BMI2-NEXT:    pushl %esi
 ; X86-BMI1BMI2-NEXT:    subl $12, %esp
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    movl %eax, %ecx
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edx
-; X86-BMI1BMI2-NEXT:    shrxl %eax, %esi, %ebx
-; X86-BMI1BMI2-NEXT:    testb $32, %al
+; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %esi
+; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edi
+; X86-BMI1BMI2-NEXT:    testb $32, %cl
 ; X86-BMI1BMI2-NEXT:    je .LBB35_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %ebx, %edx
-; X86-BMI1BMI2-NEXT:    xorl %ebx, %ebx
+; X86-BMI1BMI2-NEXT:    movl %edi, %esi
+; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
 ; X86-BMI1BMI2-NEXT:  .LBB35_2:
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ebp
 ; X86-BMI1BMI2-NEXT:    movl $64, %ecx
 ; X86-BMI1BMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    movl $-1, %esi
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %edi
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %esi
+; X86-BMI1BMI2-NEXT:    movl $-1, %ebp
+; X86-BMI1BMI2-NEXT:    shrxl %ecx, %ebp, %ebx
+; X86-BMI1BMI2-NEXT:    shrdl %cl, %ebp, %ebp
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
 ; X86-BMI1BMI2-NEXT:    je .LBB35_4
 ; X86-BMI1BMI2-NEXT:  # %bb.3:
-; X86-BMI1BMI2-NEXT:    movl %edi, %esi
-; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
+; X86-BMI1BMI2-NEXT:    movl %ebx, %ebp
+; X86-BMI1BMI2-NEXT:    xorl %ebx, %ebx
 ; X86-BMI1BMI2-NEXT:  .LBB35_4:
-; X86-BMI1BMI2-NEXT:    andl %ebx, %edi
-; X86-BMI1BMI2-NEXT:    andl %edx, %esi
 ; X86-BMI1BMI2-NEXT:    subl $8, %esp
+; X86-BMI1BMI2-NEXT:    pushl %ebx
 ; X86-BMI1BMI2-NEXT:    pushl %ebp
-; X86-BMI1BMI2-NEXT:    pushl %eax
+; X86-BMI1BMI2-NEXT:    calll use64
+; X86-BMI1BMI2-NEXT:    addl $16, %esp
+; X86-BMI1BMI2-NEXT:    andl %ebp, %esi
+; X86-BMI1BMI2-NEXT:    andl %ebx, %edi
+; X86-BMI1BMI2-NEXT:    subl $8, %esp
+; X86-BMI1BMI2-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-BMI1BMI2-NEXT:    pushl {{[0-9]+}}(%esp)
 ; X86-BMI1BMI2-NEXT:    calll use64
 ; X86-BMI1BMI2-NEXT:    addl $16, %esp
 ; X86-BMI1BMI2-NEXT:    movl %esi, %eax
@@ -4072,46 +4856,77 @@ define i64 @bextr64_c5_skipextrauses(i64
 ;
 ; X64-NOBMI-LABEL: bextr64_c5_skipextrauses:
 ; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    pushq %r15
+; X64-NOBMI-NEXT:    pushq %r14
 ; X64-NOBMI-NEXT:    pushq %rbx
-; X64-NOBMI-NEXT:    movq %rdi, %rbx
-; X64-NOBMI-NEXT:    movl %esi, %ecx
-; X64-NOBMI-NEXT:    shrq %cl, %rbx
+; X64-NOBMI-NEXT:    movq %rsi, %r14
+; X64-NOBMI-NEXT:    movq %rdi, %r15
+; X64-NOBMI-NEXT:    movl %r14d, %ecx
+; X64-NOBMI-NEXT:    shrq %cl, %r15
 ; X64-NOBMI-NEXT:    negl %edx
+; X64-NOBMI-NEXT:    movq $-1, %rbx
 ; X64-NOBMI-NEXT:    movl %edx, %ecx
-; X64-NOBMI-NEXT:    shlq %cl, %rbx
 ; X64-NOBMI-NEXT:    shrq %cl, %rbx
-; X64-NOBMI-NEXT:    movq %rsi, %rdi
+; X64-NOBMI-NEXT:    movq %rbx, %rdi
+; X64-NOBMI-NEXT:    callq use64
+; X64-NOBMI-NEXT:    andq %r15, %rbx
+; X64-NOBMI-NEXT:    movq %r14, %rdi
 ; X64-NOBMI-NEXT:    callq use64
 ; X64-NOBMI-NEXT:    movq %rbx, %rax
 ; X64-NOBMI-NEXT:    popq %rbx
+; X64-NOBMI-NEXT:    popq %r14
+; X64-NOBMI-NEXT:    popq %r15
 ; X64-NOBMI-NEXT:    retq
 ;
 ; X64-BMI1NOTBM-LABEL: bextr64_c5_skipextrauses:
 ; X64-BMI1NOTBM:       # %bb.0:
+; X64-BMI1NOTBM-NEXT:    pushq %r15
+; X64-BMI1NOTBM-NEXT:    pushq %r14
 ; X64-BMI1NOTBM-NEXT:    pushq %rbx
-; X64-BMI1NOTBM-NEXT:    movq %rsi, %rcx
-; X64-BMI1NOTBM-NEXT:    shrq %cl, %rdi
-; X64-BMI1NOTBM-NEXT:    shlq $8, %rdx
-; X64-BMI1NOTBM-NEXT:    bextrq %rdx, %rdi, %rbx
-; X64-BMI1NOTBM-NEXT:    movq %rsi, %rdi
+; X64-BMI1NOTBM-NEXT:    movq %rsi, %r14
+; X64-BMI1NOTBM-NEXT:    movq %rdi, %r15
+; X64-BMI1NOTBM-NEXT:    movl %r14d, %ecx
+; X64-BMI1NOTBM-NEXT:    shrq %cl, %r15
+; X64-BMI1NOTBM-NEXT:    negl %edx
+; X64-BMI1NOTBM-NEXT:    movq $-1, %rbx
+; X64-BMI1NOTBM-NEXT:    movl %edx, %ecx
+; X64-BMI1NOTBM-NEXT:    shrq %cl, %rbx
+; X64-BMI1NOTBM-NEXT:    movq %rbx, %rdi
+; X64-BMI1NOTBM-NEXT:    callq use64
+; X64-BMI1NOTBM-NEXT:    andq %r15, %rbx
+; X64-BMI1NOTBM-NEXT:    movq %r14, %rdi
 ; X64-BMI1NOTBM-NEXT:    callq use64
 ; X64-BMI1NOTBM-NEXT:    movq %rbx, %rax
 ; X64-BMI1NOTBM-NEXT:    popq %rbx
+; X64-BMI1NOTBM-NEXT:    popq %r14
+; X64-BMI1NOTBM-NEXT:    popq %r15
 ; X64-BMI1NOTBM-NEXT:    retq
 ;
 ; X64-BMI1BMI2-LABEL: bextr64_c5_skipextrauses:
 ; X64-BMI1BMI2:       # %bb.0:
+; X64-BMI1BMI2-NEXT:    pushq %r15
+; X64-BMI1BMI2-NEXT:    pushq %r14
 ; X64-BMI1BMI2-NEXT:    pushq %rbx
-; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %rax
-; X64-BMI1BMI2-NEXT:    bzhiq %rdx, %rax, %rbx
-; X64-BMI1BMI2-NEXT:    movq %rsi, %rdi
+; X64-BMI1BMI2-NEXT:    movq %rdx, %rbx
+; X64-BMI1BMI2-NEXT:    movq %rsi, %r14
+; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rdi, %r15
+; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
+; X64-BMI1BMI2-NEXT:    negl %eax
+; X64-BMI1BMI2-NEXT:    movq $-1, %rcx
+; X64-BMI1BMI2-NEXT:    shrxq %rax, %rcx, %rdi
+; X64-BMI1BMI2-NEXT:    callq use64
+; X64-BMI1BMI2-NEXT:    bzhiq %rbx, %r15, %rbx
+; X64-BMI1BMI2-NEXT:    movq %r14, %rdi
 ; X64-BMI1BMI2-NEXT:    callq use64
 ; X64-BMI1BMI2-NEXT:    movq %rbx, %rax
 ; X64-BMI1BMI2-NEXT:    popq %rbx
+; X64-BMI1BMI2-NEXT:    popq %r14
+; X64-BMI1BMI2-NEXT:    popq %r15
 ; X64-BMI1BMI2-NEXT:    retq
   %shifted = lshr i64 %val, %numskipbits
   %numhighbits = sub i64 64, %numlowbits
   %mask = lshr i64 -1, %numhighbits
+  call void @use64(i64 %mask)
   %masked = and i64 %mask, %shifted
   call void @use64(i64 %numskipbits)
   ret i64 %masked

Modified: llvm/trunk/test/CodeGen/X86/extract-lowbits.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/extract-lowbits.ll?rev=345583&r1=345582&r2=345583&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/extract-lowbits.ll (original)
+++ llvm/trunk/test/CodeGen/X86/extract-lowbits.ll Tue Oct 30 04:12:29 2018
@@ -1428,52 +1428,119 @@ define i64 @bzhi64_b4_commutative(i64 %v
 ; Pattern c. 32-bit
 ; ---------------------------------------------------------------------------- ;
 
+declare void @use32(i32)
+
 define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bzhi32_c0:
 ; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    subl $8, %esp
 ; X86-NOBMI-NEXT:    xorl %ecx, %ecx
 ; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X86-NOBMI-NEXT:    shll %cl, %eax
+; X86-NOBMI-NEXT:    movl $-1, %esi
 ; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X86-NOBMI-NEXT:    shrl %cl, %eax
+; X86-NOBMI-NEXT:    shrl %cl, %esi
+; X86-NOBMI-NEXT:    movl %esi, (%esp)
+; X86-NOBMI-NEXT:    calll use32
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI-NEXT:    movl %esi, %eax
+; X86-NOBMI-NEXT:    addl $8, %esp
+; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
 ;
 ; X86-BMI1NOTBM-LABEL: bzhi32_c0:
 ; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    shll $8, %eax
-; X86-BMI1NOTBM-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI1NOTBM-NEXT:    pushl %esi
+; X86-BMI1NOTBM-NEXT:    subl $8, %esp
+; X86-BMI1NOTBM-NEXT:    xorl %ecx, %ecx
+; X86-BMI1NOTBM-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
+; X86-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI1NOTBM-NEXT:    shrl %cl, %esi
+; X86-BMI1NOTBM-NEXT:    movl %esi, (%esp)
+; X86-BMI1NOTBM-NEXT:    calll use32
+; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
+; X86-BMI1NOTBM-NEXT:    addl $8, %esp
+; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    retl
 ;
 ; X86-BMI1BMI2-LABEL: bzhi32_c0:
 ; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI1BMI2-NEXT:    pushl %esi
+; X86-BMI1BMI2-NEXT:    subl $8, %esp
+; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1BMI2-NEXT:    movl %esi, %eax
+; X86-BMI1BMI2-NEXT:    negl %eax
+; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
+; X86-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %eax
+; X86-BMI1BMI2-NEXT:    movl %eax, (%esp)
+; X86-BMI1BMI2-NEXT:    calll use32
+; X86-BMI1BMI2-NEXT:    bzhil %esi, {{[0-9]+}}(%esp), %eax
+; X86-BMI1BMI2-NEXT:    addl $8, %esp
+; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi32_c0:
 ; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    pushq %rbp
+; X64-NOBMI-NEXT:    pushq %rbx
+; X64-NOBMI-NEXT:    pushq %rax
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
-; X64-NOBMI-NEXT:    movl %edi, %eax
+; X64-NOBMI-NEXT:    movl %edi, %ebx
 ; X64-NOBMI-NEXT:    negl %ecx
-; X64-NOBMI-NEXT:    shll %cl, %eax
+; X64-NOBMI-NEXT:    movl $-1, %ebp
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI-NEXT:    shrl %cl, %eax
+; X64-NOBMI-NEXT:    shrl %cl, %ebp
+; X64-NOBMI-NEXT:    movl %ebp, %edi
+; X64-NOBMI-NEXT:    callq use32
+; X64-NOBMI-NEXT:    andl %ebx, %ebp
+; X64-NOBMI-NEXT:    movl %ebp, %eax
+; X64-NOBMI-NEXT:    addq $8, %rsp
+; X64-NOBMI-NEXT:    popq %rbx
+; X64-NOBMI-NEXT:    popq %rbp
 ; X64-NOBMI-NEXT:    retq
 ;
 ; X64-BMI1NOTBM-LABEL: bzhi32_c0:
 ; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrl %esi, %edi, %eax
+; X64-BMI1NOTBM-NEXT:    pushq %rbp
+; X64-BMI1NOTBM-NEXT:    pushq %rbx
+; X64-BMI1NOTBM-NEXT:    pushq %rax
+; X64-BMI1NOTBM-NEXT:    movl %esi, %ecx
+; X64-BMI1NOTBM-NEXT:    movl %edi, %ebx
+; X64-BMI1NOTBM-NEXT:    negl %ecx
+; X64-BMI1NOTBM-NEXT:    movl $-1, %ebp
+; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI1NOTBM-NEXT:    shrl %cl, %ebp
+; X64-BMI1NOTBM-NEXT:    movl %ebp, %edi
+; X64-BMI1NOTBM-NEXT:    callq use32
+; X64-BMI1NOTBM-NEXT:    andl %ebx, %ebp
+; X64-BMI1NOTBM-NEXT:    movl %ebp, %eax
+; X64-BMI1NOTBM-NEXT:    addq $8, %rsp
+; X64-BMI1NOTBM-NEXT:    popq %rbx
+; X64-BMI1NOTBM-NEXT:    popq %rbp
 ; X64-BMI1NOTBM-NEXT:    retq
 ;
 ; X64-BMI1BMI2-LABEL: bzhi32_c0:
 ; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
+; X64-BMI1BMI2-NEXT:    pushq %rbp
+; X64-BMI1BMI2-NEXT:    pushq %rbx
+; X64-BMI1BMI2-NEXT:    pushq %rax
+; X64-BMI1BMI2-NEXT:    movl %esi, %ebx
+; X64-BMI1BMI2-NEXT:    movl %edi, %ebp
+; X64-BMI1BMI2-NEXT:    movl %esi, %eax
+; X64-BMI1BMI2-NEXT:    negl %eax
+; X64-BMI1BMI2-NEXT:    movl $-1, %ecx
+; X64-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %edi
+; X64-BMI1BMI2-NEXT:    callq use32
+; X64-BMI1BMI2-NEXT:    bzhil %ebx, %ebp, %eax
+; X64-BMI1BMI2-NEXT:    addq $8, %rsp
+; X64-BMI1BMI2-NEXT:    popq %rbx
+; X64-BMI1BMI2-NEXT:    popq %rbp
 ; X64-BMI1BMI2-NEXT:    retq
   %numhighbits = sub i32 32, %numlowbits
   %mask = lshr i32 -1, %numhighbits
+  call void @use32(i32 %mask)
   %masked = and i32 %mask, %val
   ret i32 %masked
 }
@@ -1481,50 +1548,115 @@ define i32 @bzhi32_c0(i32 %val, i32 %num
 define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bzhi32_c1_indexzext:
 ; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    subl $8, %esp
 ; X86-NOBMI-NEXT:    xorl %ecx, %ecx
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT:    shll %cl, %eax
+; X86-NOBMI-NEXT:    movl $-1, %esi
 ; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X86-NOBMI-NEXT:    shrl %cl, %eax
+; X86-NOBMI-NEXT:    shrl %cl, %esi
+; X86-NOBMI-NEXT:    movl %esi, (%esp)
+; X86-NOBMI-NEXT:    calll use32
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI-NEXT:    movl %esi, %eax
+; X86-NOBMI-NEXT:    addl $8, %esp
+; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
 ;
 ; X86-BMI1NOTBM-LABEL: bzhi32_c1_indexzext:
 ; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1NOTBM-NEXT:    shll $8, %eax
-; X86-BMI1NOTBM-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI1NOTBM-NEXT:    pushl %esi
+; X86-BMI1NOTBM-NEXT:    subl $8, %esp
+; X86-BMI1NOTBM-NEXT:    xorl %ecx, %ecx
+; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
+; X86-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI1NOTBM-NEXT:    shrl %cl, %esi
+; X86-BMI1NOTBM-NEXT:    movl %esi, (%esp)
+; X86-BMI1NOTBM-NEXT:    calll use32
+; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
+; X86-BMI1NOTBM-NEXT:    addl $8, %esp
+; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    retl
 ;
 ; X86-BMI1BMI2-LABEL: bzhi32_c1_indexzext:
 ; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI1BMI2-NEXT:    pushl %ebx
+; X86-BMI1BMI2-NEXT:    subl $8, %esp
+; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %bl
+; X86-BMI1BMI2-NEXT:    movl %ebx, %eax
+; X86-BMI1BMI2-NEXT:    negb %al
+; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
+; X86-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %eax
+; X86-BMI1BMI2-NEXT:    movl %eax, (%esp)
+; X86-BMI1BMI2-NEXT:    calll use32
+; X86-BMI1BMI2-NEXT:    bzhil %ebx, {{[0-9]+}}(%esp), %eax
+; X86-BMI1BMI2-NEXT:    addl $8, %esp
+; X86-BMI1BMI2-NEXT:    popl %ebx
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi32_c1_indexzext:
 ; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    pushq %rbp
+; X64-NOBMI-NEXT:    pushq %rbx
+; X64-NOBMI-NEXT:    pushq %rax
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
-; X64-NOBMI-NEXT:    movl %edi, %eax
+; X64-NOBMI-NEXT:    movl %edi, %ebx
 ; X64-NOBMI-NEXT:    negb %cl
-; X64-NOBMI-NEXT:    shll %cl, %eax
+; X64-NOBMI-NEXT:    movl $-1, %ebp
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI-NEXT:    shrl %cl, %eax
+; X64-NOBMI-NEXT:    shrl %cl, %ebp
+; X64-NOBMI-NEXT:    movl %ebp, %edi
+; X64-NOBMI-NEXT:    callq use32
+; X64-NOBMI-NEXT:    andl %ebx, %ebp
+; X64-NOBMI-NEXT:    movl %ebp, %eax
+; X64-NOBMI-NEXT:    addq $8, %rsp
+; X64-NOBMI-NEXT:    popq %rbx
+; X64-NOBMI-NEXT:    popq %rbp
 ; X64-NOBMI-NEXT:    retq
 ;
 ; X64-BMI1NOTBM-LABEL: bzhi32_c1_indexzext:
 ; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrl %esi, %edi, %eax
+; X64-BMI1NOTBM-NEXT:    pushq %rbp
+; X64-BMI1NOTBM-NEXT:    pushq %rbx
+; X64-BMI1NOTBM-NEXT:    pushq %rax
+; X64-BMI1NOTBM-NEXT:    movl %esi, %ecx
+; X64-BMI1NOTBM-NEXT:    movl %edi, %ebx
+; X64-BMI1NOTBM-NEXT:    negb %cl
+; X64-BMI1NOTBM-NEXT:    movl $-1, %ebp
+; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI1NOTBM-NEXT:    shrl %cl, %ebp
+; X64-BMI1NOTBM-NEXT:    movl %ebp, %edi
+; X64-BMI1NOTBM-NEXT:    callq use32
+; X64-BMI1NOTBM-NEXT:    andl %ebx, %ebp
+; X64-BMI1NOTBM-NEXT:    movl %ebp, %eax
+; X64-BMI1NOTBM-NEXT:    addq $8, %rsp
+; X64-BMI1NOTBM-NEXT:    popq %rbx
+; X64-BMI1NOTBM-NEXT:    popq %rbp
 ; X64-BMI1NOTBM-NEXT:    retq
 ;
 ; X64-BMI1BMI2-LABEL: bzhi32_c1_indexzext:
 ; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
+; X64-BMI1BMI2-NEXT:    pushq %rbp
+; X64-BMI1BMI2-NEXT:    pushq %rbx
+; X64-BMI1BMI2-NEXT:    pushq %rax
+; X64-BMI1BMI2-NEXT:    movl %esi, %ebx
+; X64-BMI1BMI2-NEXT:    movl %edi, %ebp
+; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
+; X64-BMI1BMI2-NEXT:    negb %al
+; X64-BMI1BMI2-NEXT:    movl $-1, %ecx
+; X64-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %edi
+; X64-BMI1BMI2-NEXT:    callq use32
+; X64-BMI1BMI2-NEXT:    bzhil %ebx, %ebp, %eax
+; X64-BMI1BMI2-NEXT:    addq $8, %rsp
+; X64-BMI1BMI2-NEXT:    popq %rbx
+; X64-BMI1BMI2-NEXT:    popq %rbp
 ; X64-BMI1BMI2-NEXT:    retq
   %numhighbits = sub i8 32, %numlowbits
   %sh_prom = zext i8 %numhighbits to i32
   %mask = lshr i32 -1, %sh_prom
+  call void @use32(i32 %mask)
   %masked = and i32 %mask, %val
   ret i32 %masked
 }
@@ -1532,53 +1664,106 @@ define i32 @bzhi32_c1_indexzext(i32 %val
 define i32 @bzhi32_c2_load(i32* %w, i32 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bzhi32_c2_load:
 ; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    subl $8, %esp
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI-NEXT:    movl (%eax), %eax
 ; X86-NOBMI-NEXT:    xorl %ecx, %ecx
 ; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X86-NOBMI-NEXT:    shll %cl, %eax
+; X86-NOBMI-NEXT:    movl $-1, %edx
 ; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X86-NOBMI-NEXT:    shrl %cl, %eax
+; X86-NOBMI-NEXT:    shrl %cl, %edx
+; X86-NOBMI-NEXT:    movl (%eax), %esi
+; X86-NOBMI-NEXT:    andl %edx, %esi
+; X86-NOBMI-NEXT:    movl %edx, (%esp)
+; X86-NOBMI-NEXT:    calll use32
+; X86-NOBMI-NEXT:    movl %esi, %eax
+; X86-NOBMI-NEXT:    addl $8, %esp
+; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
 ;
 ; X86-BMI1NOTBM-LABEL: bzhi32_c2_load:
 ; X86-BMI1NOTBM:       # %bb.0:
+; X86-BMI1NOTBM-NEXT:    pushl %esi
+; X86-BMI1NOTBM-NEXT:    subl $8, %esp
 ; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1NOTBM-NEXT:    shll $8, %ecx
-; X86-BMI1NOTBM-NEXT:    bextrl %ecx, (%eax), %eax
+; X86-BMI1NOTBM-NEXT:    xorl %ecx, %ecx
+; X86-BMI1NOTBM-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1NOTBM-NEXT:    movl $-1, %edx
+; X86-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
+; X86-BMI1NOTBM-NEXT:    movl (%eax), %esi
+; X86-BMI1NOTBM-NEXT:    andl %edx, %esi
+; X86-BMI1NOTBM-NEXT:    movl %edx, (%esp)
+; X86-BMI1NOTBM-NEXT:    calll use32
+; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
+; X86-BMI1NOTBM-NEXT:    addl $8, %esp
+; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    retl
 ;
 ; X86-BMI1BMI2-LABEL: bzhi32_c2_load:
 ; X86-BMI1BMI2:       # %bb.0:
+; X86-BMI1BMI2-NEXT:    pushl %esi
+; X86-BMI1BMI2-NEXT:    subl $8, %esp
 ; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    bzhil %eax, (%ecx), %eax
+; X86-BMI1BMI2-NEXT:    bzhil %ecx, (%eax), %esi
+; X86-BMI1BMI2-NEXT:    negl %ecx
+; X86-BMI1BMI2-NEXT:    movl $-1, %eax
+; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %eax
+; X86-BMI1BMI2-NEXT:    movl %eax, (%esp)
+; X86-BMI1BMI2-NEXT:    calll use32
+; X86-BMI1BMI2-NEXT:    movl %esi, %eax
+; X86-BMI1BMI2-NEXT:    addl $8, %esp
+; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi32_c2_load:
 ; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    pushq %rbx
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
-; X64-NOBMI-NEXT:    movl (%rdi), %eax
 ; X64-NOBMI-NEXT:    negl %ecx
-; X64-NOBMI-NEXT:    shll %cl, %eax
+; X64-NOBMI-NEXT:    movl $-1, %eax
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI-NEXT:    shrl %cl, %eax
+; X64-NOBMI-NEXT:    movl (%rdi), %ebx
+; X64-NOBMI-NEXT:    andl %eax, %ebx
+; X64-NOBMI-NEXT:    movl %eax, %edi
+; X64-NOBMI-NEXT:    callq use32
+; X64-NOBMI-NEXT:    movl %ebx, %eax
+; X64-NOBMI-NEXT:    popq %rbx
 ; X64-NOBMI-NEXT:    retq
 ;
 ; X64-BMI1NOTBM-LABEL: bzhi32_c2_load:
 ; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrl %esi, (%rdi), %eax
+; X64-BMI1NOTBM-NEXT:    pushq %rbx
+; X64-BMI1NOTBM-NEXT:    movl %esi, %ecx
+; X64-BMI1NOTBM-NEXT:    negl %ecx
+; X64-BMI1NOTBM-NEXT:    movl $-1, %eax
+; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI1NOTBM-NEXT:    shrl %cl, %eax
+; X64-BMI1NOTBM-NEXT:    movl (%rdi), %ebx
+; X64-BMI1NOTBM-NEXT:    andl %eax, %ebx
+; X64-BMI1NOTBM-NEXT:    movl %eax, %edi
+; X64-BMI1NOTBM-NEXT:    callq use32
+; X64-BMI1NOTBM-NEXT:    movl %ebx, %eax
+; X64-BMI1NOTBM-NEXT:    popq %rbx
 ; X64-BMI1NOTBM-NEXT:    retq
 ;
 ; X64-BMI1BMI2-LABEL: bzhi32_c2_load:
 ; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhil %esi, (%rdi), %eax
+; X64-BMI1BMI2-NEXT:    pushq %rbx
+; X64-BMI1BMI2-NEXT:    bzhil %esi, (%rdi), %ebx
+; X64-BMI1BMI2-NEXT:    negl %esi
+; X64-BMI1BMI2-NEXT:    movl $-1, %eax
+; X64-BMI1BMI2-NEXT:    shrxl %esi, %eax, %edi
+; X64-BMI1BMI2-NEXT:    callq use32
+; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
+; X64-BMI1BMI2-NEXT:    popq %rbx
 ; X64-BMI1BMI2-NEXT:    retq
   %val = load i32, i32* %w
   %numhighbits = sub i32 32, %numlowbits
   %mask = lshr i32 -1, %numhighbits
+  call void @use32(i32 %mask)
   %masked = and i32 %mask, %val
   ret i32 %masked
 }
@@ -1586,54 +1771,109 @@ define i32 @bzhi32_c2_load(i32* %w, i32
 define i32 @bzhi32_c3_load_indexzext(i32* %w, i8 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bzhi32_c3_load_indexzext:
 ; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    subl $8, %esp
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI-NEXT:    movl (%eax), %eax
 ; X86-NOBMI-NEXT:    xorl %ecx, %ecx
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT:    shll %cl, %eax
+; X86-NOBMI-NEXT:    movl $-1, %edx
 ; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X86-NOBMI-NEXT:    shrl %cl, %eax
+; X86-NOBMI-NEXT:    shrl %cl, %edx
+; X86-NOBMI-NEXT:    movl (%eax), %esi
+; X86-NOBMI-NEXT:    andl %edx, %esi
+; X86-NOBMI-NEXT:    movl %edx, (%esp)
+; X86-NOBMI-NEXT:    calll use32
+; X86-NOBMI-NEXT:    movl %esi, %eax
+; X86-NOBMI-NEXT:    addl $8, %esp
+; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
 ;
 ; X86-BMI1NOTBM-LABEL: bzhi32_c3_load_indexzext:
 ; X86-BMI1NOTBM:       # %bb.0:
+; X86-BMI1NOTBM-NEXT:    pushl %esi
+; X86-BMI1NOTBM-NEXT:    subl $8, %esp
 ; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    shll $8, %ecx
-; X86-BMI1NOTBM-NEXT:    bextrl %ecx, (%eax), %eax
+; X86-BMI1NOTBM-NEXT:    xorl %ecx, %ecx
+; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
+; X86-BMI1NOTBM-NEXT:    movl $-1, %edx
+; X86-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
+; X86-BMI1NOTBM-NEXT:    movl (%eax), %esi
+; X86-BMI1NOTBM-NEXT:    andl %edx, %esi
+; X86-BMI1NOTBM-NEXT:    movl %edx, (%esp)
+; X86-BMI1NOTBM-NEXT:    calll use32
+; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
+; X86-BMI1NOTBM-NEXT:    addl $8, %esp
+; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    retl
 ;
 ; X86-BMI1BMI2-LABEL: bzhi32_c3_load_indexzext:
 ; X86-BMI1BMI2:       # %bb.0:
+; X86-BMI1BMI2-NEXT:    pushl %esi
+; X86-BMI1BMI2-NEXT:    subl $8, %esp
 ; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    bzhil %ecx, (%eax), %eax
+; X86-BMI1BMI2-NEXT:    bzhil %ecx, (%eax), %esi
+; X86-BMI1BMI2-NEXT:    # kill: def $cl killed $cl killed $ecx def $ecx
+; X86-BMI1BMI2-NEXT:    negb %cl
+; X86-BMI1BMI2-NEXT:    movl $-1, %eax
+; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %eax
+; X86-BMI1BMI2-NEXT:    movl %eax, (%esp)
+; X86-BMI1BMI2-NEXT:    calll use32
+; X86-BMI1BMI2-NEXT:    movl %esi, %eax
+; X86-BMI1BMI2-NEXT:    addl $8, %esp
+; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi32_c3_load_indexzext:
 ; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    pushq %rbx
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
-; X64-NOBMI-NEXT:    movl (%rdi), %eax
 ; X64-NOBMI-NEXT:    negb %cl
-; X64-NOBMI-NEXT:    shll %cl, %eax
+; X64-NOBMI-NEXT:    movl $-1, %eax
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI-NEXT:    shrl %cl, %eax
+; X64-NOBMI-NEXT:    movl (%rdi), %ebx
+; X64-NOBMI-NEXT:    andl %eax, %ebx
+; X64-NOBMI-NEXT:    movl %eax, %edi
+; X64-NOBMI-NEXT:    callq use32
+; X64-NOBMI-NEXT:    movl %ebx, %eax
+; X64-NOBMI-NEXT:    popq %rbx
 ; X64-NOBMI-NEXT:    retq
 ;
 ; X64-BMI1NOTBM-LABEL: bzhi32_c3_load_indexzext:
 ; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrl %esi, (%rdi), %eax
+; X64-BMI1NOTBM-NEXT:    pushq %rbx
+; X64-BMI1NOTBM-NEXT:    movl %esi, %ecx
+; X64-BMI1NOTBM-NEXT:    negb %cl
+; X64-BMI1NOTBM-NEXT:    movl $-1, %eax
+; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI1NOTBM-NEXT:    shrl %cl, %eax
+; X64-BMI1NOTBM-NEXT:    movl (%rdi), %ebx
+; X64-BMI1NOTBM-NEXT:    andl %eax, %ebx
+; X64-BMI1NOTBM-NEXT:    movl %eax, %edi
+; X64-BMI1NOTBM-NEXT:    callq use32
+; X64-BMI1NOTBM-NEXT:    movl %ebx, %eax
+; X64-BMI1NOTBM-NEXT:    popq %rbx
 ; X64-BMI1NOTBM-NEXT:    retq
 ;
 ; X64-BMI1BMI2-LABEL: bzhi32_c3_load_indexzext:
 ; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhil %esi, (%rdi), %eax
+; X64-BMI1BMI2-NEXT:    pushq %rbx
+; X64-BMI1BMI2-NEXT:    bzhil %esi, (%rdi), %ebx
+; X64-BMI1BMI2-NEXT:    # kill: def $sil killed $sil killed $esi def $esi
+; X64-BMI1BMI2-NEXT:    negb %sil
+; X64-BMI1BMI2-NEXT:    movl $-1, %eax
+; X64-BMI1BMI2-NEXT:    shrxl %esi, %eax, %edi
+; X64-BMI1BMI2-NEXT:    callq use32
+; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
+; X64-BMI1BMI2-NEXT:    popq %rbx
 ; X64-BMI1BMI2-NEXT:    retq
   %val = load i32, i32* %w
   %numhighbits = sub i8 32, %numlowbits
   %sh_prom = zext i8 %numhighbits to i32
   %mask = lshr i32 -1, %sh_prom
+  call void @use32(i32 %mask)
   %masked = and i32 %mask, %val
   ret i32 %masked
 }
@@ -1641,131 +1881,275 @@ define i32 @bzhi32_c3_load_indexzext(i32
 define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bzhi32_c4_commutative:
 ; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    subl $8, %esp
 ; X86-NOBMI-NEXT:    xorl %ecx, %ecx
 ; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X86-NOBMI-NEXT:    shll %cl, %eax
+; X86-NOBMI-NEXT:    movl $-1, %esi
 ; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X86-NOBMI-NEXT:    shrl %cl, %eax
+; X86-NOBMI-NEXT:    shrl %cl, %esi
+; X86-NOBMI-NEXT:    movl %esi, (%esp)
+; X86-NOBMI-NEXT:    calll use32
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI-NEXT:    movl %esi, %eax
+; X86-NOBMI-NEXT:    addl $8, %esp
+; X86-NOBMI-NEXT:    popl %esi
 ; X86-NOBMI-NEXT:    retl
 ;
 ; X86-BMI1NOTBM-LABEL: bzhi32_c4_commutative:
 ; X86-BMI1NOTBM:       # %bb.0:
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    shll $8, %eax
-; X86-BMI1NOTBM-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI1NOTBM-NEXT:    pushl %esi
+; X86-BMI1NOTBM-NEXT:    subl $8, %esp
+; X86-BMI1NOTBM-NEXT:    xorl %ecx, %ecx
+; X86-BMI1NOTBM-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
+; X86-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI1NOTBM-NEXT:    shrl %cl, %esi
+; X86-BMI1NOTBM-NEXT:    movl %esi, (%esp)
+; X86-BMI1NOTBM-NEXT:    calll use32
+; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
+; X86-BMI1NOTBM-NEXT:    addl $8, %esp
+; X86-BMI1NOTBM-NEXT:    popl %esi
 ; X86-BMI1NOTBM-NEXT:    retl
 ;
 ; X86-BMI1BMI2-LABEL: bzhi32_c4_commutative:
 ; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
+; X86-BMI1BMI2-NEXT:    pushl %esi
+; X86-BMI1BMI2-NEXT:    subl $8, %esp
+; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1BMI2-NEXT:    movl %esi, %eax
+; X86-BMI1BMI2-NEXT:    negl %eax
+; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
+; X86-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %eax
+; X86-BMI1BMI2-NEXT:    movl %eax, (%esp)
+; X86-BMI1BMI2-NEXT:    calll use32
+; X86-BMI1BMI2-NEXT:    bzhil %esi, {{[0-9]+}}(%esp), %eax
+; X86-BMI1BMI2-NEXT:    addl $8, %esp
+; X86-BMI1BMI2-NEXT:    popl %esi
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi32_c4_commutative:
 ; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    pushq %rbp
+; X64-NOBMI-NEXT:    pushq %rbx
+; X64-NOBMI-NEXT:    pushq %rax
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
-; X64-NOBMI-NEXT:    movl %edi, %eax
+; X64-NOBMI-NEXT:    movl %edi, %ebx
 ; X64-NOBMI-NEXT:    negl %ecx
-; X64-NOBMI-NEXT:    shll %cl, %eax
+; X64-NOBMI-NEXT:    movl $-1, %ebp
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI-NEXT:    shrl %cl, %eax
+; X64-NOBMI-NEXT:    shrl %cl, %ebp
+; X64-NOBMI-NEXT:    movl %ebp, %edi
+; X64-NOBMI-NEXT:    callq use32
+; X64-NOBMI-NEXT:    andl %ebx, %ebp
+; X64-NOBMI-NEXT:    movl %ebp, %eax
+; X64-NOBMI-NEXT:    addq $8, %rsp
+; X64-NOBMI-NEXT:    popq %rbx
+; X64-NOBMI-NEXT:    popq %rbp
 ; X64-NOBMI-NEXT:    retq
 ;
 ; X64-BMI1NOTBM-LABEL: bzhi32_c4_commutative:
 ; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shll $8, %esi
-; X64-BMI1NOTBM-NEXT:    bextrl %esi, %edi, %eax
+; X64-BMI1NOTBM-NEXT:    pushq %rbp
+; X64-BMI1NOTBM-NEXT:    pushq %rbx
+; X64-BMI1NOTBM-NEXT:    pushq %rax
+; X64-BMI1NOTBM-NEXT:    movl %esi, %ecx
+; X64-BMI1NOTBM-NEXT:    movl %edi, %ebx
+; X64-BMI1NOTBM-NEXT:    negl %ecx
+; X64-BMI1NOTBM-NEXT:    movl $-1, %ebp
+; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI1NOTBM-NEXT:    shrl %cl, %ebp
+; X64-BMI1NOTBM-NEXT:    movl %ebp, %edi
+; X64-BMI1NOTBM-NEXT:    callq use32
+; X64-BMI1NOTBM-NEXT:    andl %ebx, %ebp
+; X64-BMI1NOTBM-NEXT:    movl %ebp, %eax
+; X64-BMI1NOTBM-NEXT:    addq $8, %rsp
+; X64-BMI1NOTBM-NEXT:    popq %rbx
+; X64-BMI1NOTBM-NEXT:    popq %rbp
 ; X64-BMI1NOTBM-NEXT:    retq
 ;
 ; X64-BMI1BMI2-LABEL: bzhi32_c4_commutative:
 ; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
+; X64-BMI1BMI2-NEXT:    pushq %rbp
+; X64-BMI1BMI2-NEXT:    pushq %rbx
+; X64-BMI1BMI2-NEXT:    pushq %rax
+; X64-BMI1BMI2-NEXT:    movl %esi, %ebx
+; X64-BMI1BMI2-NEXT:    movl %edi, %ebp
+; X64-BMI1BMI2-NEXT:    movl %esi, %eax
+; X64-BMI1BMI2-NEXT:    negl %eax
+; X64-BMI1BMI2-NEXT:    movl $-1, %ecx
+; X64-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %edi
+; X64-BMI1BMI2-NEXT:    callq use32
+; X64-BMI1BMI2-NEXT:    bzhil %ebx, %ebp, %eax
+; X64-BMI1BMI2-NEXT:    addq $8, %rsp
+; X64-BMI1BMI2-NEXT:    popq %rbx
+; X64-BMI1BMI2-NEXT:    popq %rbp
 ; X64-BMI1BMI2-NEXT:    retq
   %numhighbits = sub i32 32, %numlowbits
   %mask = lshr i32 -1, %numhighbits
+  call void @use32(i32 %mask)
   %masked = and i32 %val, %mask ; swapped order
   ret i32 %masked
 }
 
 ; 64-bit
 
+declare void @use64(i64)
+
 define i64 @bzhi64_c0(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bzhi64_c0:
 ; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    pushl %edi
+; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    pushl %eax
 ; X86-NOBMI-NEXT:    movl $64, %ecx
 ; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X86-NOBMI-NEXT:    movl $-1, %eax
-; X86-NOBMI-NEXT:    movl $-1, %edx
-; X86-NOBMI-NEXT:    shrl %cl, %edx
-; X86-NOBMI-NEXT:    shrdl %cl, %eax, %eax
+; X86-NOBMI-NEXT:    movl $-1, %esi
+; X86-NOBMI-NEXT:    movl $-1, %edi
+; X86-NOBMI-NEXT:    shrl %cl, %edi
+; X86-NOBMI-NEXT:    shrdl %cl, %esi, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    je .LBB25_2
 ; X86-NOBMI-NEXT:  # %bb.1:
-; X86-NOBMI-NEXT:    movl %edx, %eax
-; X86-NOBMI-NEXT:    xorl %edx, %edx
+; X86-NOBMI-NEXT:    movl %edi, %esi
+; X86-NOBMI-NEXT:    xorl %edi, %edi
 ; X86-NOBMI-NEXT:  .LBB25_2:
-; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT:    subl $8, %esp
+; X86-NOBMI-NEXT:    pushl %edi
+; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    calll use64
+; X86-NOBMI-NEXT:    addl $16, %esp
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edi
+; X86-NOBMI-NEXT:    movl %esi, %eax
+; X86-NOBMI-NEXT:    movl %edi, %edx
+; X86-NOBMI-NEXT:    addl $4, %esp
+; X86-NOBMI-NEXT:    popl %esi
+; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    retl
 ;
 ; X86-BMI1NOTBM-LABEL: bzhi64_c0:
 ; X86-BMI1NOTBM:       # %bb.0:
+; X86-BMI1NOTBM-NEXT:    pushl %edi
+; X86-BMI1NOTBM-NEXT:    pushl %esi
+; X86-BMI1NOTBM-NEXT:    pushl %eax
 ; X86-BMI1NOTBM-NEXT:    movl $64, %ecx
 ; X86-BMI1NOTBM-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1NOTBM-NEXT:    movl $-1, %eax
-; X86-BMI1NOTBM-NEXT:    movl $-1, %edx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %eax
+; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
+; X86-BMI1NOTBM-NEXT:    movl $-1, %edi
+; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
+; X86-BMI1NOTBM-NEXT:    shrdl %cl, %esi, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    je .LBB25_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %edx, %eax
-; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
+; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
+; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
 ; X86-BMI1NOTBM-NEXT:  .LBB25_2:
-; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-BMI1NOTBM-NEXT:    subl $8, %esp
+; X86-BMI1NOTBM-NEXT:    pushl %edi
+; X86-BMI1NOTBM-NEXT:    pushl %esi
+; X86-BMI1NOTBM-NEXT:    calll use64
+; X86-BMI1NOTBM-NEXT:    addl $16, %esp
+; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %edi
+; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
+; X86-BMI1NOTBM-NEXT:    movl %edi, %edx
+; X86-BMI1NOTBM-NEXT:    addl $4, %esp
+; X86-BMI1NOTBM-NEXT:    popl %esi
+; X86-BMI1NOTBM-NEXT:    popl %edi
 ; X86-BMI1NOTBM-NEXT:    retl
 ;
 ; X86-BMI1BMI2-LABEL: bzhi64_c0:
 ; X86-BMI1BMI2:       # %bb.0:
+; X86-BMI1BMI2-NEXT:    pushl %edi
+; X86-BMI1BMI2-NEXT:    pushl %esi
+; X86-BMI1BMI2-NEXT:    pushl %eax
 ; X86-BMI1BMI2-NEXT:    movl $64, %ecx
 ; X86-BMI1BMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    movl $-1, %eax
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edx
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %eax
+; X86-BMI1BMI2-NEXT:    movl $-1, %esi
+; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %edi
+; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %esi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
 ; X86-BMI1BMI2-NEXT:    je .LBB25_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %edx, %eax
-; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI1BMI2-NEXT:    movl %edi, %esi
+; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
 ; X86-BMI1BMI2-NEXT:  .LBB25_2:
-; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-BMI1BMI2-NEXT:    subl $8, %esp
+; X86-BMI1BMI2-NEXT:    pushl %edi
+; X86-BMI1BMI2-NEXT:    pushl %esi
+; X86-BMI1BMI2-NEXT:    calll use64
+; X86-BMI1BMI2-NEXT:    addl $16, %esp
+; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edi
+; X86-BMI1BMI2-NEXT:    movl %esi, %eax
+; X86-BMI1BMI2-NEXT:    movl %edi, %edx
+; X86-BMI1BMI2-NEXT:    addl $4, %esp
+; X86-BMI1BMI2-NEXT:    popl %esi
+; X86-BMI1BMI2-NEXT:    popl %edi
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_c0:
 ; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    pushq %r14
+; X64-NOBMI-NEXT:    pushq %rbx
+; X64-NOBMI-NEXT:    pushq %rax
 ; X64-NOBMI-NEXT:    movq %rsi, %rcx
-; X64-NOBMI-NEXT:    movq %rdi, %rax
+; X64-NOBMI-NEXT:    movq %rdi, %r14
 ; X64-NOBMI-NEXT:    negl %ecx
-; X64-NOBMI-NEXT:    shlq %cl, %rax
+; X64-NOBMI-NEXT:    movq $-1, %rbx
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
-; X64-NOBMI-NEXT:    shrq %cl, %rax
+; X64-NOBMI-NEXT:    shrq %cl, %rbx
+; X64-NOBMI-NEXT:    movq %rbx, %rdi
+; X64-NOBMI-NEXT:    callq use64
+; X64-NOBMI-NEXT:    andq %r14, %rbx
+; X64-NOBMI-NEXT:    movq %rbx, %rax
+; X64-NOBMI-NEXT:    addq $8, %rsp
+; X64-NOBMI-NEXT:    popq %rbx
+; X64-NOBMI-NEXT:    popq %r14
 ; X64-NOBMI-NEXT:    retq
 ;
 ; X64-BMI1NOTBM-LABEL: bzhi64_c0:
 ; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shlq $8, %rsi
-; X64-BMI1NOTBM-NEXT:    bextrq %rsi, %rdi, %rax
+; X64-BMI1NOTBM-NEXT:    pushq %r14
+; X64-BMI1NOTBM-NEXT:    pushq %rbx
+; X64-BMI1NOTBM-NEXT:    pushq %rax
+; X64-BMI1NOTBM-NEXT:    movq %rsi, %rcx
+; X64-BMI1NOTBM-NEXT:    movq %rdi, %r14
+; X64-BMI1NOTBM-NEXT:    negl %ecx
+; X64-BMI1NOTBM-NEXT:    movq $-1, %rbx
+; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI1NOTBM-NEXT:    shrq %cl, %rbx
+; X64-BMI1NOTBM-NEXT:    movq %rbx, %rdi
+; X64-BMI1NOTBM-NEXT:    callq use64
+; X64-BMI1NOTBM-NEXT:    andq %r14, %rbx
+; X64-BMI1NOTBM-NEXT:    movq %rbx, %rax
+; X64-BMI1NOTBM-NEXT:    addq $8, %rsp
+; X64-BMI1NOTBM-NEXT:    popq %rbx
+; X64-BMI1NOTBM-NEXT:    popq %r14
 ; X64-BMI1NOTBM-NEXT:    retq
 ;
 ; X64-BMI1BMI2-LABEL: bzhi64_c0:
 ; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
+; X64-BMI1BMI2-NEXT:    pushq %r14
+; X64-BMI1BMI2-NEXT:    pushq %rbx
+; X64-BMI1BMI2-NEXT:    pushq %rax
+; X64-BMI1BMI2-NEXT:    movq %rsi, %rbx
+; X64-BMI1BMI2-NEXT:    movq %rdi, %r14
+; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
+; X64-BMI1BMI2-NEXT:    negl %eax
+; X64-BMI1BMI2-NEXT:    movq $-1, %rcx
+; X64-BMI1BMI2-NEXT:    shrxq %rax, %rcx, %rdi
+; X64-BMI1BMI2-NEXT:    callq use64
+; X64-BMI1BMI2-NEXT:    bzhiq %rbx, %r14, %rax
+; X64-BMI1BMI2-NEXT:    addq $8, %rsp
+; X64-BMI1BMI2-NEXT:    popq %rbx
+; X64-BMI1BMI2-NEXT:    popq %r14
 ; X64-BMI1BMI2-NEXT:    retq
   %numhighbits = sub i64 64, %numlowbits
   %mask = lshr i64 -1, %numhighbits
+  call void @use64(i64 %mask)
   %masked = and i64 %mask, %val
   ret i64 %masked
 }
@@ -1773,82 +2157,157 @@ define i64 @bzhi64_c0(i64 %val, i64 %num
 define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bzhi64_c1_indexzext:
 ; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    pushl %edi
+; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    pushl %eax
 ; X86-NOBMI-NEXT:    movb $64, %cl
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT:    movl $-1, %eax
-; X86-NOBMI-NEXT:    movl $-1, %edx
-; X86-NOBMI-NEXT:    shrl %cl, %edx
-; X86-NOBMI-NEXT:    shrdl %cl, %eax, %eax
+; X86-NOBMI-NEXT:    movl $-1, %esi
+; X86-NOBMI-NEXT:    movl $-1, %edi
+; X86-NOBMI-NEXT:    shrl %cl, %edi
+; X86-NOBMI-NEXT:    shrdl %cl, %esi, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    je .LBB26_2
 ; X86-NOBMI-NEXT:  # %bb.1:
-; X86-NOBMI-NEXT:    movl %edx, %eax
-; X86-NOBMI-NEXT:    xorl %edx, %edx
+; X86-NOBMI-NEXT:    movl %edi, %esi
+; X86-NOBMI-NEXT:    xorl %edi, %edi
 ; X86-NOBMI-NEXT:  .LBB26_2:
-; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT:    subl $8, %esp
+; X86-NOBMI-NEXT:    pushl %edi
+; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    calll use64
+; X86-NOBMI-NEXT:    addl $16, %esp
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edi
+; X86-NOBMI-NEXT:    movl %esi, %eax
+; X86-NOBMI-NEXT:    movl %edi, %edx
+; X86-NOBMI-NEXT:    addl $4, %esp
+; X86-NOBMI-NEXT:    popl %esi
+; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    retl
 ;
 ; X86-BMI1NOTBM-LABEL: bzhi64_c1_indexzext:
 ; X86-BMI1NOTBM:       # %bb.0:
+; X86-BMI1NOTBM-NEXT:    pushl %edi
+; X86-BMI1NOTBM-NEXT:    pushl %esi
+; X86-BMI1NOTBM-NEXT:    pushl %eax
 ; X86-BMI1NOTBM-NEXT:    movb $64, %cl
 ; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1NOTBM-NEXT:    movl $-1, %eax
-; X86-BMI1NOTBM-NEXT:    movl $-1, %edx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %eax
+; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
+; X86-BMI1NOTBM-NEXT:    movl $-1, %edi
+; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
+; X86-BMI1NOTBM-NEXT:    shrdl %cl, %esi, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    je .LBB26_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %edx, %eax
-; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
+; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
+; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
 ; X86-BMI1NOTBM-NEXT:  .LBB26_2:
-; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-BMI1NOTBM-NEXT:    subl $8, %esp
+; X86-BMI1NOTBM-NEXT:    pushl %edi
+; X86-BMI1NOTBM-NEXT:    pushl %esi
+; X86-BMI1NOTBM-NEXT:    calll use64
+; X86-BMI1NOTBM-NEXT:    addl $16, %esp
+; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %edi
+; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
+; X86-BMI1NOTBM-NEXT:    movl %edi, %edx
+; X86-BMI1NOTBM-NEXT:    addl $4, %esp
+; X86-BMI1NOTBM-NEXT:    popl %esi
+; X86-BMI1NOTBM-NEXT:    popl %edi
 ; X86-BMI1NOTBM-NEXT:    retl
 ;
 ; X86-BMI1BMI2-LABEL: bzhi64_c1_indexzext:
 ; X86-BMI1BMI2:       # %bb.0:
+; X86-BMI1BMI2-NEXT:    pushl %edi
+; X86-BMI1BMI2-NEXT:    pushl %esi
+; X86-BMI1BMI2-NEXT:    pushl %eax
 ; X86-BMI1BMI2-NEXT:    movb $64, %cl
 ; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl $-1, %eax
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edx
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %eax
+; X86-BMI1BMI2-NEXT:    movl $-1, %esi
+; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %edi
+; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %esi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
 ; X86-BMI1BMI2-NEXT:    je .LBB26_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %edx, %eax
-; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI1BMI2-NEXT:    movl %edi, %esi
+; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
 ; X86-BMI1BMI2-NEXT:  .LBB26_2:
-; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-BMI1BMI2-NEXT:    subl $8, %esp
+; X86-BMI1BMI2-NEXT:    pushl %edi
+; X86-BMI1BMI2-NEXT:    pushl %esi
+; X86-BMI1BMI2-NEXT:    calll use64
+; X86-BMI1BMI2-NEXT:    addl $16, %esp
+; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edi
+; X86-BMI1BMI2-NEXT:    movl %esi, %eax
+; X86-BMI1BMI2-NEXT:    movl %edi, %edx
+; X86-BMI1BMI2-NEXT:    addl $4, %esp
+; X86-BMI1BMI2-NEXT:    popl %esi
+; X86-BMI1BMI2-NEXT:    popl %edi
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_c1_indexzext:
 ; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    pushq %r14
+; X64-NOBMI-NEXT:    pushq %rbx
+; X64-NOBMI-NEXT:    pushq %rax
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
-; X64-NOBMI-NEXT:    movq %rdi, %rax
+; X64-NOBMI-NEXT:    movq %rdi, %r14
 ; X64-NOBMI-NEXT:    negb %cl
-; X64-NOBMI-NEXT:    shlq %cl, %rax
+; X64-NOBMI-NEXT:    movq $-1, %rbx
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI-NEXT:    shrq %cl, %rax
+; X64-NOBMI-NEXT:    shrq %cl, %rbx
+; X64-NOBMI-NEXT:    movq %rbx, %rdi
+; X64-NOBMI-NEXT:    callq use64
+; X64-NOBMI-NEXT:    andq %r14, %rbx
+; X64-NOBMI-NEXT:    movq %rbx, %rax
+; X64-NOBMI-NEXT:    addq $8, %rsp
+; X64-NOBMI-NEXT:    popq %rbx
+; X64-NOBMI-NEXT:    popq %r14
 ; X64-NOBMI-NEXT:    retq
 ;
 ; X64-BMI1NOTBM-LABEL: bzhi64_c1_indexzext:
 ; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-BMI1NOTBM-NEXT:    shlq $8, %rsi
-; X64-BMI1NOTBM-NEXT:    bextrq %rsi, %rdi, %rax
+; X64-BMI1NOTBM-NEXT:    pushq %r14
+; X64-BMI1NOTBM-NEXT:    pushq %rbx
+; X64-BMI1NOTBM-NEXT:    pushq %rax
+; X64-BMI1NOTBM-NEXT:    movl %esi, %ecx
+; X64-BMI1NOTBM-NEXT:    movq %rdi, %r14
+; X64-BMI1NOTBM-NEXT:    negb %cl
+; X64-BMI1NOTBM-NEXT:    movq $-1, %rbx
+; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI1NOTBM-NEXT:    shrq %cl, %rbx
+; X64-BMI1NOTBM-NEXT:    movq %rbx, %rdi
+; X64-BMI1NOTBM-NEXT:    callq use64
+; X64-BMI1NOTBM-NEXT:    andq %r14, %rbx
+; X64-BMI1NOTBM-NEXT:    movq %rbx, %rax
+; X64-BMI1NOTBM-NEXT:    addq $8, %rsp
+; X64-BMI1NOTBM-NEXT:    popq %rbx
+; X64-BMI1NOTBM-NEXT:    popq %r14
 ; X64-BMI1NOTBM-NEXT:    retq
 ;
 ; X64-BMI1BMI2-LABEL: bzhi64_c1_indexzext:
 ; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-BMI1BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
+; X64-BMI1BMI2-NEXT:    pushq %r14
+; X64-BMI1BMI2-NEXT:    pushq %rbx
+; X64-BMI1BMI2-NEXT:    pushq %rax
+; X64-BMI1BMI2-NEXT:    movl %esi, %ebx
+; X64-BMI1BMI2-NEXT:    movq %rdi, %r14
+; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
+; X64-BMI1BMI2-NEXT:    negb %al
+; X64-BMI1BMI2-NEXT:    movq $-1, %rcx
+; X64-BMI1BMI2-NEXT:    shrxq %rax, %rcx, %rdi
+; X64-BMI1BMI2-NEXT:    callq use64
+; X64-BMI1BMI2-NEXT:    bzhiq %rbx, %r14, %rax
+; X64-BMI1BMI2-NEXT:    addq $8, %rsp
+; X64-BMI1BMI2-NEXT:    popq %rbx
+; X64-BMI1BMI2-NEXT:    popq %r14
 ; X64-BMI1BMI2-NEXT:    retq
   %numhighbits = sub i8 64, %numlowbits
   %sh_prom = zext i8 %numhighbits to i64
   %mask = lshr i64 -1, %sh_prom
+  call void @use64(i64 %mask)
   %masked = and i64 %mask, %val
   ret i64 %masked
 }
@@ -1856,89 +2315,153 @@ define i64 @bzhi64_c1_indexzext(i64 %val
 define i64 @bzhi64_c2_load(i64* %w, i64 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bzhi64_c2_load:
 ; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    pushl %ebx
+; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    movl $64, %ecx
 ; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X86-NOBMI-NEXT:    movl $-1, %eax
 ; X86-NOBMI-NEXT:    movl $-1, %edx
-; X86-NOBMI-NEXT:    shrl %cl, %edx
-; X86-NOBMI-NEXT:    shrdl %cl, %eax, %eax
+; X86-NOBMI-NEXT:    movl $-1, %ebx
+; X86-NOBMI-NEXT:    shrl %cl, %ebx
+; X86-NOBMI-NEXT:    shrdl %cl, %edx, %edx
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    je .LBB27_2
 ; X86-NOBMI-NEXT:  # %bb.1:
-; X86-NOBMI-NEXT:    movl %edx, %eax
-; X86-NOBMI-NEXT:    xorl %edx, %edx
+; X86-NOBMI-NEXT:    movl %ebx, %edx
+; X86-NOBMI-NEXT:    xorl %ebx, %ebx
 ; X86-NOBMI-NEXT:  .LBB27_2:
-; X86-NOBMI-NEXT:    andl (%esi), %eax
-; X86-NOBMI-NEXT:    andl 4(%esi), %edx
+; X86-NOBMI-NEXT:    movl (%eax), %esi
+; X86-NOBMI-NEXT:    andl %edx, %esi
+; X86-NOBMI-NEXT:    movl 4(%eax), %edi
+; X86-NOBMI-NEXT:    andl %ebx, %edi
+; X86-NOBMI-NEXT:    subl $8, %esp
+; X86-NOBMI-NEXT:    pushl %ebx
+; X86-NOBMI-NEXT:    pushl %edx
+; X86-NOBMI-NEXT:    calll use64
+; X86-NOBMI-NEXT:    addl $16, %esp
+; X86-NOBMI-NEXT:    movl %esi, %eax
+; X86-NOBMI-NEXT:    movl %edi, %edx
 ; X86-NOBMI-NEXT:    popl %esi
+; X86-NOBMI-NEXT:    popl %edi
+; X86-NOBMI-NEXT:    popl %ebx
 ; X86-NOBMI-NEXT:    retl
 ;
 ; X86-BMI1NOTBM-LABEL: bzhi64_c2_load:
 ; X86-BMI1NOTBM:       # %bb.0:
+; X86-BMI1NOTBM-NEXT:    pushl %ebx
+; X86-BMI1NOTBM-NEXT:    pushl %edi
 ; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1NOTBM-NEXT:    movl $64, %ecx
 ; X86-BMI1NOTBM-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1NOTBM-NEXT:    movl $-1, %eax
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %edx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %eax
+; X86-BMI1NOTBM-NEXT:    movl $-1, %ebx
+; X86-BMI1NOTBM-NEXT:    shrl %cl, %ebx
+; X86-BMI1NOTBM-NEXT:    shrdl %cl, %edx, %edx
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    je .LBB27_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %edx, %eax
-; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
+; X86-BMI1NOTBM-NEXT:    movl %ebx, %edx
+; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
 ; X86-BMI1NOTBM-NEXT:  .LBB27_2:
-; X86-BMI1NOTBM-NEXT:    andl (%esi), %eax
-; X86-BMI1NOTBM-NEXT:    andl 4(%esi), %edx
+; X86-BMI1NOTBM-NEXT:    movl (%eax), %esi
+; X86-BMI1NOTBM-NEXT:    andl %edx, %esi
+; X86-BMI1NOTBM-NEXT:    movl 4(%eax), %edi
+; X86-BMI1NOTBM-NEXT:    andl %ebx, %edi
+; X86-BMI1NOTBM-NEXT:    subl $8, %esp
+; X86-BMI1NOTBM-NEXT:    pushl %ebx
+; X86-BMI1NOTBM-NEXT:    pushl %edx
+; X86-BMI1NOTBM-NEXT:    calll use64
+; X86-BMI1NOTBM-NEXT:    addl $16, %esp
+; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
+; X86-BMI1NOTBM-NEXT:    movl %edi, %edx
 ; X86-BMI1NOTBM-NEXT:    popl %esi
+; X86-BMI1NOTBM-NEXT:    popl %edi
+; X86-BMI1NOTBM-NEXT:    popl %ebx
 ; X86-BMI1NOTBM-NEXT:    retl
 ;
 ; X86-BMI1BMI2-LABEL: bzhi64_c2_load:
 ; X86-BMI1BMI2:       # %bb.0:
+; X86-BMI1BMI2-NEXT:    pushl %ebx
+; X86-BMI1BMI2-NEXT:    pushl %edi
 ; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1BMI2-NEXT:    movl $64, %ecx
 ; X86-BMI1BMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    movl $-1, %eax
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edx
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %eax
+; X86-BMI1BMI2-NEXT:    movl $-1, %edx
+; X86-BMI1BMI2-NEXT:    shrxl %ecx, %edx, %ebx
+; X86-BMI1BMI2-NEXT:    shrdl %cl, %edx, %edx
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
 ; X86-BMI1BMI2-NEXT:    je .LBB27_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %edx, %eax
-; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI1BMI2-NEXT:    movl %ebx, %edx
+; X86-BMI1BMI2-NEXT:    xorl %ebx, %ebx
 ; X86-BMI1BMI2-NEXT:  .LBB27_2:
-; X86-BMI1BMI2-NEXT:    andl (%esi), %eax
-; X86-BMI1BMI2-NEXT:    andl 4(%esi), %edx
+; X86-BMI1BMI2-NEXT:    movl (%eax), %esi
+; X86-BMI1BMI2-NEXT:    andl %edx, %esi
+; X86-BMI1BMI2-NEXT:    movl 4(%eax), %edi
+; X86-BMI1BMI2-NEXT:    andl %ebx, %edi
+; X86-BMI1BMI2-NEXT:    subl $8, %esp
+; X86-BMI1BMI2-NEXT:    pushl %ebx
+; X86-BMI1BMI2-NEXT:    pushl %edx
+; X86-BMI1BMI2-NEXT:    calll use64
+; X86-BMI1BMI2-NEXT:    addl $16, %esp
+; X86-BMI1BMI2-NEXT:    movl %esi, %eax
+; X86-BMI1BMI2-NEXT:    movl %edi, %edx
 ; X86-BMI1BMI2-NEXT:    popl %esi
+; X86-BMI1BMI2-NEXT:    popl %edi
+; X86-BMI1BMI2-NEXT:    popl %ebx
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_c2_load:
 ; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    pushq %rbx
 ; X64-NOBMI-NEXT:    movq %rsi, %rcx
-; X64-NOBMI-NEXT:    movq (%rdi), %rax
 ; X64-NOBMI-NEXT:    negl %ecx
-; X64-NOBMI-NEXT:    shlq %cl, %rax
+; X64-NOBMI-NEXT:    movq $-1, %rax
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NOBMI-NEXT:    shrq %cl, %rax
+; X64-NOBMI-NEXT:    movq (%rdi), %rbx
+; X64-NOBMI-NEXT:    andq %rax, %rbx
+; X64-NOBMI-NEXT:    movq %rax, %rdi
+; X64-NOBMI-NEXT:    callq use64
+; X64-NOBMI-NEXT:    movq %rbx, %rax
+; X64-NOBMI-NEXT:    popq %rbx
 ; X64-NOBMI-NEXT:    retq
 ;
 ; X64-BMI1NOTBM-LABEL: bzhi64_c2_load:
 ; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shlq $8, %rsi
-; X64-BMI1NOTBM-NEXT:    bextrq %rsi, (%rdi), %rax
+; X64-BMI1NOTBM-NEXT:    pushq %rbx
+; X64-BMI1NOTBM-NEXT:    movq %rsi, %rcx
+; X64-BMI1NOTBM-NEXT:    negl %ecx
+; X64-BMI1NOTBM-NEXT:    movq $-1, %rax
+; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI1NOTBM-NEXT:    shrq %cl, %rax
+; X64-BMI1NOTBM-NEXT:    movq (%rdi), %rbx
+; X64-BMI1NOTBM-NEXT:    andq %rax, %rbx
+; X64-BMI1NOTBM-NEXT:    movq %rax, %rdi
+; X64-BMI1NOTBM-NEXT:    callq use64
+; X64-BMI1NOTBM-NEXT:    movq %rbx, %rax
+; X64-BMI1NOTBM-NEXT:    popq %rbx
 ; X64-BMI1NOTBM-NEXT:    retq
 ;
 ; X64-BMI1BMI2-LABEL: bzhi64_c2_load:
 ; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
+; X64-BMI1BMI2-NEXT:    pushq %rbx
+; X64-BMI1BMI2-NEXT:    bzhiq %rsi, (%rdi), %rbx
+; X64-BMI1BMI2-NEXT:    # kill: def $esi killed $esi killed $rsi def $rsi
+; X64-BMI1BMI2-NEXT:    negl %esi
+; X64-BMI1BMI2-NEXT:    movq $-1, %rax
+; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rax, %rdi
+; X64-BMI1BMI2-NEXT:    callq use64
+; X64-BMI1BMI2-NEXT:    movq %rbx, %rax
+; X64-BMI1BMI2-NEXT:    popq %rbx
 ; X64-BMI1BMI2-NEXT:    retq
   %val = load i64, i64* %w
   %numhighbits = sub i64 64, %numlowbits
   %mask = lshr i64 -1, %numhighbits
+  call void @use64(i64 %mask)
   %masked = and i64 %mask, %val
   ret i64 %masked
 }
@@ -1946,92 +2469,155 @@ define i64 @bzhi64_c2_load(i64* %w, i64
 define i64 @bzhi64_c3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bzhi64_c3_load_indexzext:
 ; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    pushl %ebx
+; X86-NOBMI-NEXT:    pushl %edi
 ; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NOBMI-NEXT:    movb $64, %cl
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    movl $-1, %eax
-; X86-NOBMI-NEXT:    movl $-1, %edx
-; X86-NOBMI-NEXT:    shrl %cl, %edx
+; X86-NOBMI-NEXT:    movl $-1, %ebx
+; X86-NOBMI-NEXT:    shrl %cl, %ebx
 ; X86-NOBMI-NEXT:    shrdl %cl, %eax, %eax
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    je .LBB28_2
 ; X86-NOBMI-NEXT:  # %bb.1:
-; X86-NOBMI-NEXT:    movl %edx, %eax
-; X86-NOBMI-NEXT:    xorl %edx, %edx
+; X86-NOBMI-NEXT:    movl %ebx, %eax
+; X86-NOBMI-NEXT:    xorl %ebx, %ebx
 ; X86-NOBMI-NEXT:  .LBB28_2:
-; X86-NOBMI-NEXT:    andl (%esi), %eax
-; X86-NOBMI-NEXT:    andl 4(%esi), %edx
+; X86-NOBMI-NEXT:    movl (%edx), %esi
+; X86-NOBMI-NEXT:    andl %eax, %esi
+; X86-NOBMI-NEXT:    movl 4(%edx), %edi
+; X86-NOBMI-NEXT:    andl %ebx, %edi
+; X86-NOBMI-NEXT:    subl $8, %esp
+; X86-NOBMI-NEXT:    pushl %ebx
+; X86-NOBMI-NEXT:    pushl %eax
+; X86-NOBMI-NEXT:    calll use64
+; X86-NOBMI-NEXT:    addl $16, %esp
+; X86-NOBMI-NEXT:    movl %esi, %eax
+; X86-NOBMI-NEXT:    movl %edi, %edx
 ; X86-NOBMI-NEXT:    popl %esi
+; X86-NOBMI-NEXT:    popl %edi
+; X86-NOBMI-NEXT:    popl %ebx
 ; X86-NOBMI-NEXT:    retl
 ;
 ; X86-BMI1NOTBM-LABEL: bzhi64_c3_load_indexzext:
 ; X86-BMI1NOTBM:       # %bb.0:
+; X86-BMI1NOTBM-NEXT:    pushl %ebx
+; X86-BMI1NOTBM-NEXT:    pushl %edi
 ; X86-BMI1NOTBM-NEXT:    pushl %esi
-; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1NOTBM-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI1NOTBM-NEXT:    movb $64, %cl
 ; X86-BMI1NOTBM-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1NOTBM-NEXT:    movl $-1, %eax
-; X86-BMI1NOTBM-NEXT:    movl $-1, %edx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
+; X86-BMI1NOTBM-NEXT:    movl $-1, %ebx
+; X86-BMI1NOTBM-NEXT:    shrl %cl, %ebx
 ; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %eax
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    je .LBB28_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %edx, %eax
-; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
+; X86-BMI1NOTBM-NEXT:    movl %ebx, %eax
+; X86-BMI1NOTBM-NEXT:    xorl %ebx, %ebx
 ; X86-BMI1NOTBM-NEXT:  .LBB28_2:
-; X86-BMI1NOTBM-NEXT:    andl (%esi), %eax
-; X86-BMI1NOTBM-NEXT:    andl 4(%esi), %edx
+; X86-BMI1NOTBM-NEXT:    movl (%edx), %esi
+; X86-BMI1NOTBM-NEXT:    andl %eax, %esi
+; X86-BMI1NOTBM-NEXT:    movl 4(%edx), %edi
+; X86-BMI1NOTBM-NEXT:    andl %ebx, %edi
+; X86-BMI1NOTBM-NEXT:    subl $8, %esp
+; X86-BMI1NOTBM-NEXT:    pushl %ebx
+; X86-BMI1NOTBM-NEXT:    pushl %eax
+; X86-BMI1NOTBM-NEXT:    calll use64
+; X86-BMI1NOTBM-NEXT:    addl $16, %esp
+; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
+; X86-BMI1NOTBM-NEXT:    movl %edi, %edx
 ; X86-BMI1NOTBM-NEXT:    popl %esi
+; X86-BMI1NOTBM-NEXT:    popl %edi
+; X86-BMI1NOTBM-NEXT:    popl %ebx
 ; X86-BMI1NOTBM-NEXT:    retl
 ;
 ; X86-BMI1BMI2-LABEL: bzhi64_c3_load_indexzext:
 ; X86-BMI1BMI2:       # %bb.0:
+; X86-BMI1BMI2-NEXT:    pushl %ebx
+; X86-BMI1BMI2-NEXT:    pushl %edi
 ; X86-BMI1BMI2-NEXT:    pushl %esi
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-BMI1BMI2-NEXT:    movb $64, %cl
 ; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI1BMI2-NEXT:    movl $-1, %eax
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edx
+; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %ebx
 ; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %eax
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
 ; X86-BMI1BMI2-NEXT:    je .LBB28_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %edx, %eax
-; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI1BMI2-NEXT:    movl %ebx, %eax
+; X86-BMI1BMI2-NEXT:    xorl %ebx, %ebx
 ; X86-BMI1BMI2-NEXT:  .LBB28_2:
-; X86-BMI1BMI2-NEXT:    andl (%esi), %eax
-; X86-BMI1BMI2-NEXT:    andl 4(%esi), %edx
+; X86-BMI1BMI2-NEXT:    movl (%edx), %esi
+; X86-BMI1BMI2-NEXT:    andl %eax, %esi
+; X86-BMI1BMI2-NEXT:    movl 4(%edx), %edi
+; X86-BMI1BMI2-NEXT:    andl %ebx, %edi
+; X86-BMI1BMI2-NEXT:    subl $8, %esp
+; X86-BMI1BMI2-NEXT:    pushl %ebx
+; X86-BMI1BMI2-NEXT:    pushl %eax
+; X86-BMI1BMI2-NEXT:    calll use64
+; X86-BMI1BMI2-NEXT:    addl $16, %esp
+; X86-BMI1BMI2-NEXT:    movl %esi, %eax
+; X86-BMI1BMI2-NEXT:    movl %edi, %edx
 ; X86-BMI1BMI2-NEXT:    popl %esi
+; X86-BMI1BMI2-NEXT:    popl %edi
+; X86-BMI1BMI2-NEXT:    popl %ebx
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_c3_load_indexzext:
 ; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    pushq %rbx
 ; X64-NOBMI-NEXT:    movl %esi, %ecx
-; X64-NOBMI-NEXT:    movq (%rdi), %rax
 ; X64-NOBMI-NEXT:    negb %cl
-; X64-NOBMI-NEXT:    shlq %cl, %rax
+; X64-NOBMI-NEXT:    movq $-1, %rax
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI-NEXT:    shrq %cl, %rax
+; X64-NOBMI-NEXT:    movq (%rdi), %rbx
+; X64-NOBMI-NEXT:    andq %rax, %rbx
+; X64-NOBMI-NEXT:    movq %rax, %rdi
+; X64-NOBMI-NEXT:    callq use64
+; X64-NOBMI-NEXT:    movq %rbx, %rax
+; X64-NOBMI-NEXT:    popq %rbx
 ; X64-NOBMI-NEXT:    retq
 ;
 ; X64-BMI1NOTBM-LABEL: bzhi64_c3_load_indexzext:
 ; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-BMI1NOTBM-NEXT:    shlq $8, %rsi
-; X64-BMI1NOTBM-NEXT:    bextrq %rsi, (%rdi), %rax
+; X64-BMI1NOTBM-NEXT:    pushq %rbx
+; X64-BMI1NOTBM-NEXT:    movl %esi, %ecx
+; X64-BMI1NOTBM-NEXT:    negb %cl
+; X64-BMI1NOTBM-NEXT:    movq $-1, %rax
+; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI1NOTBM-NEXT:    shrq %cl, %rax
+; X64-BMI1NOTBM-NEXT:    movq (%rdi), %rbx
+; X64-BMI1NOTBM-NEXT:    andq %rax, %rbx
+; X64-BMI1NOTBM-NEXT:    movq %rax, %rdi
+; X64-BMI1NOTBM-NEXT:    callq use64
+; X64-BMI1NOTBM-NEXT:    movq %rbx, %rax
+; X64-BMI1NOTBM-NEXT:    popq %rbx
 ; X64-BMI1NOTBM-NEXT:    retq
 ;
 ; X64-BMI1BMI2-LABEL: bzhi64_c3_load_indexzext:
 ; X64-BMI1BMI2:       # %bb.0:
+; X64-BMI1BMI2-NEXT:    pushq %rbx
 ; X64-BMI1BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-BMI1BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
+; X64-BMI1BMI2-NEXT:    bzhiq %rsi, (%rdi), %rbx
+; X64-BMI1BMI2-NEXT:    # kill: def $sil killed $sil killed $rsi def $rsi
+; X64-BMI1BMI2-NEXT:    negb %sil
+; X64-BMI1BMI2-NEXT:    movq $-1, %rax
+; X64-BMI1BMI2-NEXT:    shrxq %rsi, %rax, %rdi
+; X64-BMI1BMI2-NEXT:    callq use64
+; X64-BMI1BMI2-NEXT:    movq %rbx, %rax
+; X64-BMI1BMI2-NEXT:    popq %rbx
 ; X64-BMI1BMI2-NEXT:    retq
   %val = load i64, i64* %w
   %numhighbits = sub i8 64, %numlowbits
   %sh_prom = zext i8 %numhighbits to i64
   %mask = lshr i64 -1, %sh_prom
+  call void @use64(i64 %mask)
   %masked = and i64 %mask, %val
   ret i64 %masked
 }
@@ -2039,79 +2625,156 @@ define i64 @bzhi64_c3_load_indexzext(i64
 define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits) nounwind {
 ; X86-NOBMI-LABEL: bzhi64_c4_commutative:
 ; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    pushl %edi
+; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    pushl %eax
 ; X86-NOBMI-NEXT:    movl $64, %ecx
 ; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X86-NOBMI-NEXT:    movl $-1, %eax
-; X86-NOBMI-NEXT:    movl $-1, %edx
-; X86-NOBMI-NEXT:    shrl %cl, %edx
-; X86-NOBMI-NEXT:    shrdl %cl, %eax, %eax
+; X86-NOBMI-NEXT:    movl $-1, %esi
+; X86-NOBMI-NEXT:    movl $-1, %edi
+; X86-NOBMI-NEXT:    shrl %cl, %edi
+; X86-NOBMI-NEXT:    shrdl %cl, %esi, %esi
 ; X86-NOBMI-NEXT:    testb $32, %cl
 ; X86-NOBMI-NEXT:    je .LBB29_2
 ; X86-NOBMI-NEXT:  # %bb.1:
-; X86-NOBMI-NEXT:    movl %edx, %eax
-; X86-NOBMI-NEXT:    xorl %edx, %edx
+; X86-NOBMI-NEXT:    movl %edi, %esi
+; X86-NOBMI-NEXT:    xorl %edi, %edi
 ; X86-NOBMI-NEXT:  .LBB29_2:
-; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT:    subl $8, %esp
+; X86-NOBMI-NEXT:    pushl %edi
+; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    calll use64
+; X86-NOBMI-NEXT:    addl $16, %esp
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edi
+; X86-NOBMI-NEXT:    movl %esi, %eax
+; X86-NOBMI-NEXT:    movl %edi, %edx
+; X86-NOBMI-NEXT:    addl $4, %esp
+; X86-NOBMI-NEXT:    popl %esi
+; X86-NOBMI-NEXT:    popl %edi
 ; X86-NOBMI-NEXT:    retl
 ;
 ; X86-BMI1NOTBM-LABEL: bzhi64_c4_commutative:
 ; X86-BMI1NOTBM:       # %bb.0:
+; X86-BMI1NOTBM-NEXT:    pushl %edi
+; X86-BMI1NOTBM-NEXT:    pushl %esi
+; X86-BMI1NOTBM-NEXT:    pushl %eax
 ; X86-BMI1NOTBM-NEXT:    movl $64, %ecx
 ; X86-BMI1NOTBM-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1NOTBM-NEXT:    movl $-1, %eax
-; X86-BMI1NOTBM-NEXT:    movl $-1, %edx
-; X86-BMI1NOTBM-NEXT:    shrl %cl, %edx
-; X86-BMI1NOTBM-NEXT:    shrdl %cl, %eax, %eax
+; X86-BMI1NOTBM-NEXT:    movl $-1, %esi
+; X86-BMI1NOTBM-NEXT:    movl $-1, %edi
+; X86-BMI1NOTBM-NEXT:    shrl %cl, %edi
+; X86-BMI1NOTBM-NEXT:    shrdl %cl, %esi, %esi
 ; X86-BMI1NOTBM-NEXT:    testb $32, %cl
 ; X86-BMI1NOTBM-NEXT:    je .LBB29_2
 ; X86-BMI1NOTBM-NEXT:  # %bb.1:
-; X86-BMI1NOTBM-NEXT:    movl %edx, %eax
-; X86-BMI1NOTBM-NEXT:    xorl %edx, %edx
+; X86-BMI1NOTBM-NEXT:    movl %edi, %esi
+; X86-BMI1NOTBM-NEXT:    xorl %edi, %edi
 ; X86-BMI1NOTBM-NEXT:  .LBB29_2:
-; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-BMI1NOTBM-NEXT:    subl $8, %esp
+; X86-BMI1NOTBM-NEXT:    pushl %edi
+; X86-BMI1NOTBM-NEXT:    pushl %esi
+; X86-BMI1NOTBM-NEXT:    calll use64
+; X86-BMI1NOTBM-NEXT:    addl $16, %esp
+; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-BMI1NOTBM-NEXT:    andl {{[0-9]+}}(%esp), %edi
+; X86-BMI1NOTBM-NEXT:    movl %esi, %eax
+; X86-BMI1NOTBM-NEXT:    movl %edi, %edx
+; X86-BMI1NOTBM-NEXT:    addl $4, %esp
+; X86-BMI1NOTBM-NEXT:    popl %esi
+; X86-BMI1NOTBM-NEXT:    popl %edi
 ; X86-BMI1NOTBM-NEXT:    retl
 ;
 ; X86-BMI1BMI2-LABEL: bzhi64_c4_commutative:
 ; X86-BMI1BMI2:       # %bb.0:
+; X86-BMI1BMI2-NEXT:    pushl %edi
+; X86-BMI1BMI2-NEXT:    pushl %esi
+; X86-BMI1BMI2-NEXT:    pushl %eax
 ; X86-BMI1BMI2-NEXT:    movl $64, %ecx
 ; X86-BMI1BMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    movl $-1, %eax
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edx
-; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %eax
+; X86-BMI1BMI2-NEXT:    movl $-1, %esi
+; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %edi
+; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %esi
 ; X86-BMI1BMI2-NEXT:    testb $32, %cl
 ; X86-BMI1BMI2-NEXT:    je .LBB29_2
 ; X86-BMI1BMI2-NEXT:  # %bb.1:
-; X86-BMI1BMI2-NEXT:    movl %edx, %eax
-; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
+; X86-BMI1BMI2-NEXT:    movl %edi, %esi
+; X86-BMI1BMI2-NEXT:    xorl %edi, %edi
 ; X86-BMI1BMI2-NEXT:  .LBB29_2:
-; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-BMI1BMI2-NEXT:    subl $8, %esp
+; X86-BMI1BMI2-NEXT:    pushl %edi
+; X86-BMI1BMI2-NEXT:    pushl %esi
+; X86-BMI1BMI2-NEXT:    calll use64
+; X86-BMI1BMI2-NEXT:    addl $16, %esp
+; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edi
+; X86-BMI1BMI2-NEXT:    movl %esi, %eax
+; X86-BMI1BMI2-NEXT:    movl %edi, %edx
+; X86-BMI1BMI2-NEXT:    addl $4, %esp
+; X86-BMI1BMI2-NEXT:    popl %esi
+; X86-BMI1BMI2-NEXT:    popl %edi
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi64_c4_commutative:
 ; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    pushq %r14
+; X64-NOBMI-NEXT:    pushq %rbx
+; X64-NOBMI-NEXT:    pushq %rax
 ; X64-NOBMI-NEXT:    movq %rsi, %rcx
-; X64-NOBMI-NEXT:    movq %rdi, %rax
+; X64-NOBMI-NEXT:    movq %rdi, %r14
 ; X64-NOBMI-NEXT:    negl %ecx
-; X64-NOBMI-NEXT:    shlq %cl, %rax
+; X64-NOBMI-NEXT:    movq $-1, %rbx
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
-; X64-NOBMI-NEXT:    shrq %cl, %rax
+; X64-NOBMI-NEXT:    shrq %cl, %rbx
+; X64-NOBMI-NEXT:    movq %rbx, %rdi
+; X64-NOBMI-NEXT:    callq use64
+; X64-NOBMI-NEXT:    andq %r14, %rbx
+; X64-NOBMI-NEXT:    movq %rbx, %rax
+; X64-NOBMI-NEXT:    addq $8, %rsp
+; X64-NOBMI-NEXT:    popq %rbx
+; X64-NOBMI-NEXT:    popq %r14
 ; X64-NOBMI-NEXT:    retq
 ;
 ; X64-BMI1NOTBM-LABEL: bzhi64_c4_commutative:
 ; X64-BMI1NOTBM:       # %bb.0:
-; X64-BMI1NOTBM-NEXT:    shlq $8, %rsi
-; X64-BMI1NOTBM-NEXT:    bextrq %rsi, %rdi, %rax
+; X64-BMI1NOTBM-NEXT:    pushq %r14
+; X64-BMI1NOTBM-NEXT:    pushq %rbx
+; X64-BMI1NOTBM-NEXT:    pushq %rax
+; X64-BMI1NOTBM-NEXT:    movq %rsi, %rcx
+; X64-BMI1NOTBM-NEXT:    movq %rdi, %r14
+; X64-BMI1NOTBM-NEXT:    negl %ecx
+; X64-BMI1NOTBM-NEXT:    movq $-1, %rbx
+; X64-BMI1NOTBM-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI1NOTBM-NEXT:    shrq %cl, %rbx
+; X64-BMI1NOTBM-NEXT:    movq %rbx, %rdi
+; X64-BMI1NOTBM-NEXT:    callq use64
+; X64-BMI1NOTBM-NEXT:    andq %r14, %rbx
+; X64-BMI1NOTBM-NEXT:    movq %rbx, %rax
+; X64-BMI1NOTBM-NEXT:    addq $8, %rsp
+; X64-BMI1NOTBM-NEXT:    popq %rbx
+; X64-BMI1NOTBM-NEXT:    popq %r14
 ; X64-BMI1NOTBM-NEXT:    retq
 ;
 ; X64-BMI1BMI2-LABEL: bzhi64_c4_commutative:
 ; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
+; X64-BMI1BMI2-NEXT:    pushq %r14
+; X64-BMI1BMI2-NEXT:    pushq %rbx
+; X64-BMI1BMI2-NEXT:    pushq %rax
+; X64-BMI1BMI2-NEXT:    movq %rsi, %rbx
+; X64-BMI1BMI2-NEXT:    movq %rdi, %r14
+; X64-BMI1BMI2-NEXT:    movl %ebx, %eax
+; X64-BMI1BMI2-NEXT:    negl %eax
+; X64-BMI1BMI2-NEXT:    movq $-1, %rcx
+; X64-BMI1BMI2-NEXT:    shrxq %rax, %rcx, %rdi
+; X64-BMI1BMI2-NEXT:    callq use64
+; X64-BMI1BMI2-NEXT:    bzhiq %rbx, %r14, %rax
+; X64-BMI1BMI2-NEXT:    addq $8, %rsp
+; X64-BMI1BMI2-NEXT:    popq %rbx
+; X64-BMI1BMI2-NEXT:    popq %r14
 ; X64-BMI1BMI2-NEXT:    retq
   %numhighbits = sub i64 64, %numlowbits
   %mask = lshr i64 -1, %numhighbits
+  call void @use64(i64 %mask)
   %masked = and i64 %val, %mask ; swapped order
   ret i64 %masked
 }




More information about the llvm-commits mailing list