[llvm] 2307bd3 - [NFC][X86] Adjust multi-use tests in extract-lowbits.ll
Roman Lebedev via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 7 13:20:58 PDT 2021
Author: Roman Lebedev
Date: 2021-09-07T23:20:36+03:00
New Revision: 2307bd3caf299d197d563d3ab06596ded0ff7430
URL: https://github.com/llvm/llvm-project/commit/2307bd3caf299d197d563d3ab06596ded0ff7430
DIFF: https://github.com/llvm/llvm-project/commit/2307bd3caf299d197d563d3ab06596ded0ff7430.diff
LOG: [NFC][X86] Adjust multi-use tests in extract-lowbits.ll
Added:
Modified:
llvm/test/CodeGen/X86/extract-lowbits.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/extract-lowbits.ll b/llvm/test/CodeGen/X86/extract-lowbits.ll
index 177f99e7660c..40413219984b 100644
--- a/llvm/test/CodeGen/X86/extract-lowbits.ll
+++ b/llvm/test/CodeGen/X86/extract-lowbits.ll
@@ -859,105 +859,63 @@ define i32 @bzhi64_32_a1(i64 %val, i32 %numlowbits) nounwind {
; Shifting happens in 64-bit, then truncation (with extra use).
; Masking is 32-bit.
-define i32 @bzhi64_32_a1_trunc_extrause(i64 %val, i32 %numlowbits) nounwind {
+define i32 @bzhi64_32_a1_trunc_extrause(i64 %val, i32 %numlowbits, i32* %escape) nounwind {
; X86-NOBMI-LABEL: bzhi64_32_a1_trunc_extrause:
; X86-NOBMI: # %bb.0:
-; X86-NOBMI-NEXT: pushl %ebx
-; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: pushl %eax
-; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %bl
-; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NOBMI-NEXT: movl %esi, (%esp)
-; X86-NOBMI-NEXT: calll use32 at PLT
+; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT: movl %edx, (%eax)
; X86-NOBMI-NEXT: movl $1, %eax
-; X86-NOBMI-NEXT: movl %ebx, %ecx
; X86-NOBMI-NEXT: shll %cl, %eax
; X86-NOBMI-NEXT: decl %eax
-; X86-NOBMI-NEXT: andl %esi, %eax
-; X86-NOBMI-NEXT: addl $4, %esp
-; X86-NOBMI-NEXT: popl %esi
-; X86-NOBMI-NEXT: popl %ebx
+; X86-NOBMI-NEXT: andl %edx, %eax
; X86-NOBMI-NEXT: retl
;
; X86-BMI1-LABEL: bzhi64_32_a1_trunc_extrause:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: pushl %ebx
-; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: pushl %eax
-; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %bl
-; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-BMI1-NEXT: movl %esi, (%esp)
-; X86-BMI1-NEXT: calll use32 at PLT
-; X86-BMI1-NEXT: shll $8, %ebx
-; X86-BMI1-NEXT: bextrl %ebx, %esi, %eax
-; X86-BMI1-NEXT: addl $4, %esp
-; X86-BMI1-NEXT: popl %esi
-; X86-BMI1-NEXT: popl %ebx
+; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-BMI1-NEXT: movl %ecx, (%edx)
+; X86-BMI1-NEXT: shll $8, %eax
+; X86-BMI1-NEXT: bextrl %eax, %ecx, %eax
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: bzhi64_32_a1_trunc_extrause:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: pushl %ebx
-; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: pushl %eax
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
-; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-BMI2-NEXT: movl %esi, (%esp)
-; X86-BMI2-NEXT: calll use32 at PLT
-; X86-BMI2-NEXT: bzhil %ebx, %esi, %eax
-; X86-BMI2-NEXT: addl $4, %esp
-; X86-BMI2-NEXT: popl %esi
-; X86-BMI2-NEXT: popl %ebx
+; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT: movl %ecx, (%edx)
+; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax
; X86-BMI2-NEXT: retl
;
; X64-NOBMI-LABEL: bzhi64_32_a1_trunc_extrause:
; X64-NOBMI: # %bb.0:
-; X64-NOBMI-NEXT: pushq %rbp
-; X64-NOBMI-NEXT: pushq %rbx
-; X64-NOBMI-NEXT: pushq %rax
-; X64-NOBMI-NEXT: movl %esi, %ebp
-; X64-NOBMI-NEXT: movq %rdi, %rbx
-; X64-NOBMI-NEXT: callq use32 at PLT
+; X64-NOBMI-NEXT: movl %esi, %ecx
+; X64-NOBMI-NEXT: movl %edi, (%rdx)
; X64-NOBMI-NEXT: movl $1, %eax
-; X64-NOBMI-NEXT: movl %ebp, %ecx
+; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: shll %cl, %eax
; X64-NOBMI-NEXT: decl %eax
-; X64-NOBMI-NEXT: andl %ebx, %eax
-; X64-NOBMI-NEXT: addq $8, %rsp
-; X64-NOBMI-NEXT: popq %rbx
-; X64-NOBMI-NEXT: popq %rbp
+; X64-NOBMI-NEXT: andl %edi, %eax
; X64-NOBMI-NEXT: retq
;
; X64-BMI1-LABEL: bzhi64_32_a1_trunc_extrause:
; X64-BMI1: # %bb.0:
-; X64-BMI1-NEXT: pushq %r14
-; X64-BMI1-NEXT: pushq %rbx
-; X64-BMI1-NEXT: pushq %rax
-; X64-BMI1-NEXT: movl %esi, %ebx
-; X64-BMI1-NEXT: movq %rdi, %r14
-; X64-BMI1-NEXT: callq use32 at PLT
-; X64-BMI1-NEXT: shll $8, %ebx
-; X64-BMI1-NEXT: bextrl %ebx, %r14d, %eax
-; X64-BMI1-NEXT: addq $8, %rsp
-; X64-BMI1-NEXT: popq %rbx
-; X64-BMI1-NEXT: popq %r14
+; X64-BMI1-NEXT: movl %edi, (%rdx)
+; X64-BMI1-NEXT: shll $8, %esi
+; X64-BMI1-NEXT: bextrl %esi, %edi, %eax
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: bzhi64_32_a1_trunc_extrause:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: pushq %rbp
-; X64-BMI2-NEXT: pushq %rbx
-; X64-BMI2-NEXT: pushq %rax
-; X64-BMI2-NEXT: movl %esi, %ebp
-; X64-BMI2-NEXT: movq %rdi, %rbx
-; X64-BMI2-NEXT: callq use32 at PLT
-; X64-BMI2-NEXT: bzhil %ebp, %ebx, %eax
-; X64-BMI2-NEXT: addq $8, %rsp
-; X64-BMI2-NEXT: popq %rbx
-; X64-BMI2-NEXT: popq %rbp
+; X64-BMI2-NEXT: movl %edi, (%rdx)
+; X64-BMI2-NEXT: bzhil %esi, %edi, %eax
; X64-BMI2-NEXT: retq
%truncval = trunc i64 %val to i32
- call void @use32(i32 %truncval)
+ store i32 %truncval, i32* %escape
%onebit = shl i32 1, %numlowbits
%mask = add nsw i32 %onebit, -1
%masked = and i32 %mask, %truncval
@@ -2044,915 +2002,654 @@ define i32 @bzhi64_32_b3(i64 %val, i8 %numlowbits) nounwind {
; Pattern c. 32-bit
; ---------------------------------------------------------------------------- ;
-declare void @use32(i32)
-
-define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind {
+define i32 @bzhi32_c0(i32 %val, i32 %numlowbits, i32* %escape) nounwind {
; X86-NOBMI-LABEL: bzhi32_c0:
; X86-NOBMI: # %bb.0:
-; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: subl $8, %esp
+; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOBMI-NEXT: xorl %ecx, %ecx
; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT: movl $-1, %esi
+; X86-NOBMI-NEXT: movl $-1, %eax
; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-NOBMI-NEXT: shrl %cl, %esi
-; X86-NOBMI-NEXT: movl %esi, (%esp)
-; X86-NOBMI-NEXT: calll use32 at PLT
-; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %esi
-; X86-NOBMI-NEXT: movl %esi, %eax
-; X86-NOBMI-NEXT: addl $8, %esp
-; X86-NOBMI-NEXT: popl %esi
+; X86-NOBMI-NEXT: shrl %cl, %eax
+; X86-NOBMI-NEXT: movl %eax, (%edx)
+; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: retl
;
; X86-BMI1-LABEL: bzhi32_c0:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: subl $8, %esp
+; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI1-NEXT: xorl %ecx, %ecx
; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1-NEXT: movl $-1, %esi
+; X86-BMI1-NEXT: movl $-1, %eax
; X86-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-BMI1-NEXT: shrl %cl, %esi
-; X86-BMI1-NEXT: movl %esi, (%esp)
-; X86-BMI1-NEXT: calll use32 at PLT
-; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %esi
-; X86-BMI1-NEXT: movl %esi, %eax
-; X86-BMI1-NEXT: addl $8, %esp
-; X86-BMI1-NEXT: popl %esi
+; X86-BMI1-NEXT: shrl %cl, %eax
+; X86-BMI1-NEXT: movl %eax, (%edx)
+; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: bzhi32_c0:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: pushl %ebx
-; X86-BMI2-NEXT: subl $8, %esp
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
-; X86-BMI2-NEXT: movl %ebx, %eax
-; X86-BMI2-NEXT: negb %al
-; X86-BMI2-NEXT: movl $-1, %ecx
-; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax
-; X86-BMI2-NEXT: movl %eax, (%esp)
-; X86-BMI2-NEXT: calll use32 at PLT
-; X86-BMI2-NEXT: bzhil %ebx, {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT: addl $8, %esp
-; X86-BMI2-NEXT: popl %ebx
+; X86-BMI2-NEXT: pushl %esi
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-BMI2-NEXT: bzhil %edx, {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: # kill: def $dl killed $dl killed $edx def $edx
+; X86-BMI2-NEXT: negb %dl
+; X86-BMI2-NEXT: movl $-1, %esi
+; X86-BMI2-NEXT: shrxl %edx, %esi, %edx
+; X86-BMI2-NEXT: movl %edx, (%ecx)
+; X86-BMI2-NEXT: popl %esi
; X86-BMI2-NEXT: retl
;
; X64-NOBMI-LABEL: bzhi32_c0:
; X64-NOBMI: # %bb.0:
-; X64-NOBMI-NEXT: pushq %rbp
-; X64-NOBMI-NEXT: pushq %rbx
-; X64-NOBMI-NEXT: pushq %rax
; X64-NOBMI-NEXT: movl %esi, %ecx
-; X64-NOBMI-NEXT: movl %edi, %ebx
; X64-NOBMI-NEXT: negb %cl
-; X64-NOBMI-NEXT: movl $-1, %ebp
+; X64-NOBMI-NEXT: movl $-1, %eax
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI-NEXT: shrl %cl, %ebp
-; X64-NOBMI-NEXT: movl %ebp, %edi
-; X64-NOBMI-NEXT: callq use32 at PLT
-; X64-NOBMI-NEXT: andl %ebx, %ebp
-; X64-NOBMI-NEXT: movl %ebp, %eax
-; X64-NOBMI-NEXT: addq $8, %rsp
-; X64-NOBMI-NEXT: popq %rbx
-; X64-NOBMI-NEXT: popq %rbp
+; X64-NOBMI-NEXT: shrl %cl, %eax
+; X64-NOBMI-NEXT: movl %eax, (%rdx)
+; X64-NOBMI-NEXT: andl %edi, %eax
; X64-NOBMI-NEXT: retq
;
; X64-BMI1-LABEL: bzhi32_c0:
; X64-BMI1: # %bb.0:
-; X64-BMI1-NEXT: pushq %rbp
-; X64-BMI1-NEXT: pushq %rbx
-; X64-BMI1-NEXT: pushq %rax
; X64-BMI1-NEXT: movl %esi, %ecx
-; X64-BMI1-NEXT: movl %edi, %ebx
; X64-BMI1-NEXT: negb %cl
-; X64-BMI1-NEXT: movl $-1, %ebp
+; X64-BMI1-NEXT: movl $-1, %eax
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-BMI1-NEXT: shrl %cl, %ebp
-; X64-BMI1-NEXT: movl %ebp, %edi
-; X64-BMI1-NEXT: callq use32 at PLT
-; X64-BMI1-NEXT: andl %ebx, %ebp
-; X64-BMI1-NEXT: movl %ebp, %eax
-; X64-BMI1-NEXT: addq $8, %rsp
-; X64-BMI1-NEXT: popq %rbx
-; X64-BMI1-NEXT: popq %rbp
+; X64-BMI1-NEXT: shrl %cl, %eax
+; X64-BMI1-NEXT: movl %eax, (%rdx)
+; X64-BMI1-NEXT: andl %edi, %eax
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: bzhi32_c0:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: pushq %rbp
-; X64-BMI2-NEXT: pushq %rbx
-; X64-BMI2-NEXT: pushq %rax
-; X64-BMI2-NEXT: movl %esi, %ebx
-; X64-BMI2-NEXT: movl %edi, %ebp
-; X64-BMI2-NEXT: movl %ebx, %eax
-; X64-BMI2-NEXT: negb %al
+; X64-BMI2-NEXT: bzhil %esi, %edi, %eax
+; X64-BMI2-NEXT: # kill: def $sil killed $sil killed $esi def $esi
+; X64-BMI2-NEXT: negb %sil
; X64-BMI2-NEXT: movl $-1, %ecx
-; X64-BMI2-NEXT: shrxl %eax, %ecx, %edi
-; X64-BMI2-NEXT: callq use32 at PLT
-; X64-BMI2-NEXT: bzhil %ebx, %ebp, %eax
-; X64-BMI2-NEXT: addq $8, %rsp
-; X64-BMI2-NEXT: popq %rbx
-; X64-BMI2-NEXT: popq %rbp
+; X64-BMI2-NEXT: shrxl %esi, %ecx, %ecx
+; X64-BMI2-NEXT: movl %ecx, (%rdx)
; X64-BMI2-NEXT: retq
%numhighbits = sub i32 32, %numlowbits
%mask = lshr i32 -1, %numhighbits
- call void @use32(i32 %mask)
+ store i32 %mask, i32* %escape
%masked = and i32 %mask, %val
ret i32 %masked
}
-define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind {
+define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits, i32* %escape) nounwind {
; X86-NOBMI-LABEL: bzhi32_c1_indexzext:
; X86-NOBMI: # %bb.0:
-; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: subl $8, %esp
+; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOBMI-NEXT: xorl %ecx, %ecx
; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT: movl $-1, %esi
+; X86-NOBMI-NEXT: movl $-1, %eax
; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-NOBMI-NEXT: shrl %cl, %esi
-; X86-NOBMI-NEXT: movl %esi, (%esp)
-; X86-NOBMI-NEXT: calll use32 at PLT
-; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %esi
-; X86-NOBMI-NEXT: movl %esi, %eax
-; X86-NOBMI-NEXT: addl $8, %esp
-; X86-NOBMI-NEXT: popl %esi
+; X86-NOBMI-NEXT: shrl %cl, %eax
+; X86-NOBMI-NEXT: movl %eax, (%edx)
+; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: retl
;
; X86-BMI1-LABEL: bzhi32_c1_indexzext:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: subl $8, %esp
+; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI1-NEXT: xorl %ecx, %ecx
; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1-NEXT: movl $-1, %esi
+; X86-BMI1-NEXT: movl $-1, %eax
; X86-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-BMI1-NEXT: shrl %cl, %esi
-; X86-BMI1-NEXT: movl %esi, (%esp)
-; X86-BMI1-NEXT: calll use32 at PLT
-; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %esi
-; X86-BMI1-NEXT: movl %esi, %eax
-; X86-BMI1-NEXT: addl $8, %esp
-; X86-BMI1-NEXT: popl %esi
+; X86-BMI1-NEXT: shrl %cl, %eax
+; X86-BMI1-NEXT: movl %eax, (%edx)
+; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: bzhi32_c1_indexzext:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: pushl %ebx
-; X86-BMI2-NEXT: subl $8, %esp
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
-; X86-BMI2-NEXT: movl %ebx, %eax
-; X86-BMI2-NEXT: negb %al
-; X86-BMI2-NEXT: movl $-1, %ecx
-; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax
-; X86-BMI2-NEXT: movl %eax, (%esp)
-; X86-BMI2-NEXT: calll use32 at PLT
-; X86-BMI2-NEXT: bzhil %ebx, {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT: addl $8, %esp
-; X86-BMI2-NEXT: popl %ebx
+; X86-BMI2-NEXT: pushl %esi
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-BMI2-NEXT: bzhil %edx, {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: # kill: def $dl killed $dl killed $edx def $edx
+; X86-BMI2-NEXT: negb %dl
+; X86-BMI2-NEXT: movl $-1, %esi
+; X86-BMI2-NEXT: shrxl %edx, %esi, %edx
+; X86-BMI2-NEXT: movl %edx, (%ecx)
+; X86-BMI2-NEXT: popl %esi
; X86-BMI2-NEXT: retl
;
; X64-NOBMI-LABEL: bzhi32_c1_indexzext:
; X64-NOBMI: # %bb.0:
-; X64-NOBMI-NEXT: pushq %rbp
-; X64-NOBMI-NEXT: pushq %rbx
-; X64-NOBMI-NEXT: pushq %rax
; X64-NOBMI-NEXT: movl %esi, %ecx
-; X64-NOBMI-NEXT: movl %edi, %ebx
; X64-NOBMI-NEXT: negb %cl
-; X64-NOBMI-NEXT: movl $-1, %ebp
+; X64-NOBMI-NEXT: movl $-1, %eax
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI-NEXT: shrl %cl, %ebp
-; X64-NOBMI-NEXT: movl %ebp, %edi
-; X64-NOBMI-NEXT: callq use32 at PLT
-; X64-NOBMI-NEXT: andl %ebx, %ebp
-; X64-NOBMI-NEXT: movl %ebp, %eax
-; X64-NOBMI-NEXT: addq $8, %rsp
-; X64-NOBMI-NEXT: popq %rbx
-; X64-NOBMI-NEXT: popq %rbp
+; X64-NOBMI-NEXT: shrl %cl, %eax
+; X64-NOBMI-NEXT: movl %eax, (%rdx)
+; X64-NOBMI-NEXT: andl %edi, %eax
; X64-NOBMI-NEXT: retq
;
; X64-BMI1-LABEL: bzhi32_c1_indexzext:
; X64-BMI1: # %bb.0:
-; X64-BMI1-NEXT: pushq %rbp
-; X64-BMI1-NEXT: pushq %rbx
-; X64-BMI1-NEXT: pushq %rax
; X64-BMI1-NEXT: movl %esi, %ecx
-; X64-BMI1-NEXT: movl %edi, %ebx
; X64-BMI1-NEXT: negb %cl
-; X64-BMI1-NEXT: movl $-1, %ebp
+; X64-BMI1-NEXT: movl $-1, %eax
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-BMI1-NEXT: shrl %cl, %ebp
-; X64-BMI1-NEXT: movl %ebp, %edi
-; X64-BMI1-NEXT: callq use32 at PLT
-; X64-BMI1-NEXT: andl %ebx, %ebp
-; X64-BMI1-NEXT: movl %ebp, %eax
-; X64-BMI1-NEXT: addq $8, %rsp
-; X64-BMI1-NEXT: popq %rbx
-; X64-BMI1-NEXT: popq %rbp
+; X64-BMI1-NEXT: shrl %cl, %eax
+; X64-BMI1-NEXT: movl %eax, (%rdx)
+; X64-BMI1-NEXT: andl %edi, %eax
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: bzhi32_c1_indexzext:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: pushq %rbp
-; X64-BMI2-NEXT: pushq %rbx
-; X64-BMI2-NEXT: pushq %rax
-; X64-BMI2-NEXT: movl %esi, %ebx
-; X64-BMI2-NEXT: movl %edi, %ebp
-; X64-BMI2-NEXT: movl %ebx, %eax
-; X64-BMI2-NEXT: negb %al
+; X64-BMI2-NEXT: bzhil %esi, %edi, %eax
+; X64-BMI2-NEXT: # kill: def $sil killed $sil killed $esi def $esi
+; X64-BMI2-NEXT: negb %sil
; X64-BMI2-NEXT: movl $-1, %ecx
-; X64-BMI2-NEXT: shrxl %eax, %ecx, %edi
-; X64-BMI2-NEXT: callq use32 at PLT
-; X64-BMI2-NEXT: bzhil %ebx, %ebp, %eax
-; X64-BMI2-NEXT: addq $8, %rsp
-; X64-BMI2-NEXT: popq %rbx
-; X64-BMI2-NEXT: popq %rbp
+; X64-BMI2-NEXT: shrxl %esi, %ecx, %ecx
+; X64-BMI2-NEXT: movl %ecx, (%rdx)
; X64-BMI2-NEXT: retq
%numhighbits = sub i8 32, %numlowbits
%sh_prom = zext i8 %numhighbits to i32
%mask = lshr i32 -1, %sh_prom
- call void @use32(i32 %mask)
+ store i32 %mask, i32* %escape
%masked = and i32 %mask, %val
ret i32 %masked
}
-define i32 @bzhi32_c2_load(i32* %w, i32 %numlowbits) nounwind {
+define i32 @bzhi32_c2_load(i32* %w, i32 %numlowbits, i32* %escape) nounwind {
; X86-NOBMI-LABEL: bzhi32_c2_load:
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: subl $8, %esp
+; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: xorl %ecx, %ecx
; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT: movl $-1, %edx
+; X86-NOBMI-NEXT: movl $-1, %esi
; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-NOBMI-NEXT: shrl %cl, %edx
-; X86-NOBMI-NEXT: movl (%eax), %esi
-; X86-NOBMI-NEXT: andl %edx, %esi
-; X86-NOBMI-NEXT: movl %edx, (%esp)
-; X86-NOBMI-NEXT: calll use32 at PLT
-; X86-NOBMI-NEXT: movl %esi, %eax
-; X86-NOBMI-NEXT: addl $8, %esp
+; X86-NOBMI-NEXT: shrl %cl, %esi
+; X86-NOBMI-NEXT: movl (%eax), %eax
+; X86-NOBMI-NEXT: andl %esi, %eax
+; X86-NOBMI-NEXT: movl %esi, (%edx)
; X86-NOBMI-NEXT: popl %esi
; X86-NOBMI-NEXT: retl
;
; X86-BMI1-LABEL: bzhi32_c2_load:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: subl $8, %esp
+; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: xorl %ecx, %ecx
; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1-NEXT: movl $-1, %edx
+; X86-BMI1-NEXT: movl $-1, %esi
; X86-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-BMI1-NEXT: shrl %cl, %edx
-; X86-BMI1-NEXT: movl (%eax), %esi
-; X86-BMI1-NEXT: andl %edx, %esi
-; X86-BMI1-NEXT: movl %edx, (%esp)
-; X86-BMI1-NEXT: calll use32 at PLT
-; X86-BMI1-NEXT: movl %esi, %eax
-; X86-BMI1-NEXT: addl $8, %esp
+; X86-BMI1-NEXT: shrl %cl, %esi
+; X86-BMI1-NEXT: movl (%eax), %eax
+; X86-BMI1-NEXT: andl %esi, %eax
+; X86-BMI1-NEXT: movl %esi, (%edx)
; X86-BMI1-NEXT: popl %esi
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: bzhi32_c2_load:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: subl $8, %esp
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-BMI2-NEXT: bzhil %ecx, (%eax), %esi
-; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx
-; X86-BMI2-NEXT: negb %cl
-; X86-BMI2-NEXT: movl $-1, %eax
-; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax
-; X86-BMI2-NEXT: movl %eax, (%esp)
-; X86-BMI2-NEXT: calll use32 at PLT
-; X86-BMI2-NEXT: movl %esi, %eax
-; X86-BMI2-NEXT: addl $8, %esp
+; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-BMI2-NEXT: bzhil %edx, (%eax), %eax
+; X86-BMI2-NEXT: # kill: def $dl killed $dl killed $edx def $edx
+; X86-BMI2-NEXT: negb %dl
+; X86-BMI2-NEXT: movl $-1, %esi
+; X86-BMI2-NEXT: shrxl %edx, %esi, %edx
+; X86-BMI2-NEXT: movl %edx, (%ecx)
; X86-BMI2-NEXT: popl %esi
; X86-BMI2-NEXT: retl
;
; X64-NOBMI-LABEL: bzhi32_c2_load:
; X64-NOBMI: # %bb.0:
-; X64-NOBMI-NEXT: pushq %rbx
; X64-NOBMI-NEXT: movl %esi, %ecx
; X64-NOBMI-NEXT: negb %cl
-; X64-NOBMI-NEXT: movl $-1, %eax
+; X64-NOBMI-NEXT: movl $-1, %esi
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI-NEXT: shrl %cl, %eax
-; X64-NOBMI-NEXT: movl (%rdi), %ebx
-; X64-NOBMI-NEXT: andl %eax, %ebx
-; X64-NOBMI-NEXT: movl %eax, %edi
-; X64-NOBMI-NEXT: callq use32 at PLT
-; X64-NOBMI-NEXT: movl %ebx, %eax
-; X64-NOBMI-NEXT: popq %rbx
+; X64-NOBMI-NEXT: shrl %cl, %esi
+; X64-NOBMI-NEXT: movl (%rdi), %eax
+; X64-NOBMI-NEXT: andl %esi, %eax
+; X64-NOBMI-NEXT: movl %esi, (%rdx)
; X64-NOBMI-NEXT: retq
;
; X64-BMI1-LABEL: bzhi32_c2_load:
; X64-BMI1: # %bb.0:
-; X64-BMI1-NEXT: pushq %rbx
; X64-BMI1-NEXT: movl %esi, %ecx
; X64-BMI1-NEXT: negb %cl
-; X64-BMI1-NEXT: movl $-1, %eax
+; X64-BMI1-NEXT: movl $-1, %esi
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-BMI1-NEXT: shrl %cl, %eax
-; X64-BMI1-NEXT: movl (%rdi), %ebx
-; X64-BMI1-NEXT: andl %eax, %ebx
-; X64-BMI1-NEXT: movl %eax, %edi
-; X64-BMI1-NEXT: callq use32 at PLT
-; X64-BMI1-NEXT: movl %ebx, %eax
-; X64-BMI1-NEXT: popq %rbx
+; X64-BMI1-NEXT: shrl %cl, %esi
+; X64-BMI1-NEXT: movl (%rdi), %eax
+; X64-BMI1-NEXT: andl %esi, %eax
+; X64-BMI1-NEXT: movl %esi, (%rdx)
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: bzhi32_c2_load:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: pushq %rbx
-; X64-BMI2-NEXT: bzhil %esi, (%rdi), %ebx
+; X64-BMI2-NEXT: bzhil %esi, (%rdi), %eax
; X64-BMI2-NEXT: # kill: def $sil killed $sil killed $esi def $esi
; X64-BMI2-NEXT: negb %sil
-; X64-BMI2-NEXT: movl $-1, %eax
-; X64-BMI2-NEXT: shrxl %esi, %eax, %edi
-; X64-BMI2-NEXT: callq use32 at PLT
-; X64-BMI2-NEXT: movl %ebx, %eax
-; X64-BMI2-NEXT: popq %rbx
+; X64-BMI2-NEXT: movl $-1, %ecx
+; X64-BMI2-NEXT: shrxl %esi, %ecx, %ecx
+; X64-BMI2-NEXT: movl %ecx, (%rdx)
; X64-BMI2-NEXT: retq
%val = load i32, i32* %w
%numhighbits = sub i32 32, %numlowbits
%mask = lshr i32 -1, %numhighbits
- call void @use32(i32 %mask)
+ store i32 %mask, i32* %escape
%masked = and i32 %mask, %val
ret i32 %masked
}
-define i32 @bzhi32_c3_load_indexzext(i32* %w, i8 %numlowbits) nounwind {
+define i32 @bzhi32_c3_load_indexzext(i32* %w, i8 %numlowbits, i32* %escape) nounwind {
; X86-NOBMI-LABEL: bzhi32_c3_load_indexzext:
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: subl $8, %esp
+; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: xorl %ecx, %ecx
; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT: movl $-1, %edx
+; X86-NOBMI-NEXT: movl $-1, %esi
; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-NOBMI-NEXT: shrl %cl, %edx
-; X86-NOBMI-NEXT: movl (%eax), %esi
-; X86-NOBMI-NEXT: andl %edx, %esi
-; X86-NOBMI-NEXT: movl %edx, (%esp)
-; X86-NOBMI-NEXT: calll use32 at PLT
-; X86-NOBMI-NEXT: movl %esi, %eax
-; X86-NOBMI-NEXT: addl $8, %esp
+; X86-NOBMI-NEXT: shrl %cl, %esi
+; X86-NOBMI-NEXT: movl (%eax), %eax
+; X86-NOBMI-NEXT: andl %esi, %eax
+; X86-NOBMI-NEXT: movl %esi, (%edx)
; X86-NOBMI-NEXT: popl %esi
; X86-NOBMI-NEXT: retl
;
; X86-BMI1-LABEL: bzhi32_c3_load_indexzext:
; X86-BMI1: # %bb.0:
; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: subl $8, %esp
+; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: xorl %ecx, %ecx
; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1-NEXT: movl $-1, %edx
+; X86-BMI1-NEXT: movl $-1, %esi
; X86-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-BMI1-NEXT: shrl %cl, %edx
-; X86-BMI1-NEXT: movl (%eax), %esi
-; X86-BMI1-NEXT: andl %edx, %esi
-; X86-BMI1-NEXT: movl %edx, (%esp)
-; X86-BMI1-NEXT: calll use32 at PLT
-; X86-BMI1-NEXT: movl %esi, %eax
-; X86-BMI1-NEXT: addl $8, %esp
+; X86-BMI1-NEXT: shrl %cl, %esi
+; X86-BMI1-NEXT: movl (%eax), %eax
+; X86-BMI1-NEXT: andl %esi, %eax
+; X86-BMI1-NEXT: movl %esi, (%edx)
; X86-BMI1-NEXT: popl %esi
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: bzhi32_c3_load_indexzext:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: subl $8, %esp
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-BMI2-NEXT: bzhil %ecx, (%eax), %esi
-; X86-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx def $ecx
-; X86-BMI2-NEXT: negb %cl
-; X86-BMI2-NEXT: movl $-1, %eax
-; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax
-; X86-BMI2-NEXT: movl %eax, (%esp)
-; X86-BMI2-NEXT: calll use32 at PLT
-; X86-BMI2-NEXT: movl %esi, %eax
-; X86-BMI2-NEXT: addl $8, %esp
+; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-BMI2-NEXT: bzhil %edx, (%eax), %eax
+; X86-BMI2-NEXT: # kill: def $dl killed $dl killed $edx def $edx
+; X86-BMI2-NEXT: negb %dl
+; X86-BMI2-NEXT: movl $-1, %esi
+; X86-BMI2-NEXT: shrxl %edx, %esi, %edx
+; X86-BMI2-NEXT: movl %edx, (%ecx)
; X86-BMI2-NEXT: popl %esi
; X86-BMI2-NEXT: retl
;
; X64-NOBMI-LABEL: bzhi32_c3_load_indexzext:
; X64-NOBMI: # %bb.0:
-; X64-NOBMI-NEXT: pushq %rbx
; X64-NOBMI-NEXT: movl %esi, %ecx
; X64-NOBMI-NEXT: negb %cl
-; X64-NOBMI-NEXT: movl $-1, %eax
+; X64-NOBMI-NEXT: movl $-1, %esi
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI-NEXT: shrl %cl, %eax
-; X64-NOBMI-NEXT: movl (%rdi), %ebx
-; X64-NOBMI-NEXT: andl %eax, %ebx
-; X64-NOBMI-NEXT: movl %eax, %edi
-; X64-NOBMI-NEXT: callq use32 at PLT
-; X64-NOBMI-NEXT: movl %ebx, %eax
-; X64-NOBMI-NEXT: popq %rbx
+; X64-NOBMI-NEXT: shrl %cl, %esi
+; X64-NOBMI-NEXT: movl (%rdi), %eax
+; X64-NOBMI-NEXT: andl %esi, %eax
+; X64-NOBMI-NEXT: movl %esi, (%rdx)
; X64-NOBMI-NEXT: retq
;
; X64-BMI1-LABEL: bzhi32_c3_load_indexzext:
; X64-BMI1: # %bb.0:
-; X64-BMI1-NEXT: pushq %rbx
; X64-BMI1-NEXT: movl %esi, %ecx
; X64-BMI1-NEXT: negb %cl
-; X64-BMI1-NEXT: movl $-1, %eax
+; X64-BMI1-NEXT: movl $-1, %esi
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-BMI1-NEXT: shrl %cl, %eax
-; X64-BMI1-NEXT: movl (%rdi), %ebx
-; X64-BMI1-NEXT: andl %eax, %ebx
-; X64-BMI1-NEXT: movl %eax, %edi
-; X64-BMI1-NEXT: callq use32 at PLT
-; X64-BMI1-NEXT: movl %ebx, %eax
-; X64-BMI1-NEXT: popq %rbx
+; X64-BMI1-NEXT: shrl %cl, %esi
+; X64-BMI1-NEXT: movl (%rdi), %eax
+; X64-BMI1-NEXT: andl %esi, %eax
+; X64-BMI1-NEXT: movl %esi, (%rdx)
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: bzhi32_c3_load_indexzext:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: pushq %rbx
-; X64-BMI2-NEXT: bzhil %esi, (%rdi), %ebx
+; X64-BMI2-NEXT: bzhil %esi, (%rdi), %eax
; X64-BMI2-NEXT: # kill: def $sil killed $sil killed $esi def $esi
; X64-BMI2-NEXT: negb %sil
-; X64-BMI2-NEXT: movl $-1, %eax
-; X64-BMI2-NEXT: shrxl %esi, %eax, %edi
-; X64-BMI2-NEXT: callq use32 at PLT
-; X64-BMI2-NEXT: movl %ebx, %eax
-; X64-BMI2-NEXT: popq %rbx
+; X64-BMI2-NEXT: movl $-1, %ecx
+; X64-BMI2-NEXT: shrxl %esi, %ecx, %ecx
+; X64-BMI2-NEXT: movl %ecx, (%rdx)
; X64-BMI2-NEXT: retq
%val = load i32, i32* %w
%numhighbits = sub i8 32, %numlowbits
%sh_prom = zext i8 %numhighbits to i32
%mask = lshr i32 -1, %sh_prom
- call void @use32(i32 %mask)
+ store i32 %mask, i32* %escape
%masked = and i32 %mask, %val
ret i32 %masked
}
-define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind {
+define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits, i32* %escape) nounwind {
; X86-NOBMI-LABEL: bzhi32_c4_commutative:
; X86-NOBMI: # %bb.0:
-; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: subl $8, %esp
+; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOBMI-NEXT: xorl %ecx, %ecx
; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT: movl $-1, %esi
+; X86-NOBMI-NEXT: movl $-1, %eax
; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-NOBMI-NEXT: shrl %cl, %esi
-; X86-NOBMI-NEXT: movl %esi, (%esp)
-; X86-NOBMI-NEXT: calll use32 at PLT
-; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %esi
-; X86-NOBMI-NEXT: movl %esi, %eax
-; X86-NOBMI-NEXT: addl $8, %esp
-; X86-NOBMI-NEXT: popl %esi
+; X86-NOBMI-NEXT: shrl %cl, %eax
+; X86-NOBMI-NEXT: movl %eax, (%edx)
+; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: retl
;
; X86-BMI1-LABEL: bzhi32_c4_commutative:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: subl $8, %esp
+; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI1-NEXT: xorl %ecx, %ecx
; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1-NEXT: movl $-1, %esi
+; X86-BMI1-NEXT: movl $-1, %eax
; X86-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-BMI1-NEXT: shrl %cl, %esi
-; X86-BMI1-NEXT: movl %esi, (%esp)
-; X86-BMI1-NEXT: calll use32 at PLT
-; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %esi
-; X86-BMI1-NEXT: movl %esi, %eax
-; X86-BMI1-NEXT: addl $8, %esp
-; X86-BMI1-NEXT: popl %esi
+; X86-BMI1-NEXT: shrl %cl, %eax
+; X86-BMI1-NEXT: movl %eax, (%edx)
+; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: bzhi32_c4_commutative:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: pushl %ebx
-; X86-BMI2-NEXT: subl $8, %esp
-; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl
-; X86-BMI2-NEXT: movl %ebx, %eax
-; X86-BMI2-NEXT: negb %al
-; X86-BMI2-NEXT: movl $-1, %ecx
-; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax
-; X86-BMI2-NEXT: movl %eax, (%esp)
-; X86-BMI2-NEXT: calll use32 at PLT
-; X86-BMI2-NEXT: bzhil %ebx, {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT: addl $8, %esp
-; X86-BMI2-NEXT: popl %ebx
+; X86-BMI2-NEXT: pushl %esi
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl
+; X86-BMI2-NEXT: bzhil %edx, {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: # kill: def $dl killed $dl killed $edx def $edx
+; X86-BMI2-NEXT: negb %dl
+; X86-BMI2-NEXT: movl $-1, %esi
+; X86-BMI2-NEXT: shrxl %edx, %esi, %edx
+; X86-BMI2-NEXT: movl %edx, (%ecx)
+; X86-BMI2-NEXT: popl %esi
; X86-BMI2-NEXT: retl
;
; X64-NOBMI-LABEL: bzhi32_c4_commutative:
; X64-NOBMI: # %bb.0:
-; X64-NOBMI-NEXT: pushq %rbp
-; X64-NOBMI-NEXT: pushq %rbx
-; X64-NOBMI-NEXT: pushq %rax
; X64-NOBMI-NEXT: movl %esi, %ecx
-; X64-NOBMI-NEXT: movl %edi, %ebx
; X64-NOBMI-NEXT: negb %cl
-; X64-NOBMI-NEXT: movl $-1, %ebp
+; X64-NOBMI-NEXT: movl $-1, %eax
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI-NEXT: shrl %cl, %ebp
-; X64-NOBMI-NEXT: movl %ebp, %edi
-; X64-NOBMI-NEXT: callq use32 at PLT
-; X64-NOBMI-NEXT: andl %ebx, %ebp
-; X64-NOBMI-NEXT: movl %ebp, %eax
-; X64-NOBMI-NEXT: addq $8, %rsp
-; X64-NOBMI-NEXT: popq %rbx
-; X64-NOBMI-NEXT: popq %rbp
+; X64-NOBMI-NEXT: shrl %cl, %eax
+; X64-NOBMI-NEXT: movl %eax, (%rdx)
+; X64-NOBMI-NEXT: andl %edi, %eax
; X64-NOBMI-NEXT: retq
;
; X64-BMI1-LABEL: bzhi32_c4_commutative:
; X64-BMI1: # %bb.0:
-; X64-BMI1-NEXT: pushq %rbp
-; X64-BMI1-NEXT: pushq %rbx
-; X64-BMI1-NEXT: pushq %rax
; X64-BMI1-NEXT: movl %esi, %ecx
-; X64-BMI1-NEXT: movl %edi, %ebx
; X64-BMI1-NEXT: negb %cl
-; X64-BMI1-NEXT: movl $-1, %ebp
+; X64-BMI1-NEXT: movl $-1, %eax
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-BMI1-NEXT: shrl %cl, %ebp
-; X64-BMI1-NEXT: movl %ebp, %edi
-; X64-BMI1-NEXT: callq use32 at PLT
-; X64-BMI1-NEXT: andl %ebx, %ebp
-; X64-BMI1-NEXT: movl %ebp, %eax
-; X64-BMI1-NEXT: addq $8, %rsp
-; X64-BMI1-NEXT: popq %rbx
-; X64-BMI1-NEXT: popq %rbp
+; X64-BMI1-NEXT: shrl %cl, %eax
+; X64-BMI1-NEXT: movl %eax, (%rdx)
+; X64-BMI1-NEXT: andl %edi, %eax
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: bzhi32_c4_commutative:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: pushq %rbp
-; X64-BMI2-NEXT: pushq %rbx
-; X64-BMI2-NEXT: pushq %rax
-; X64-BMI2-NEXT: movl %esi, %ebx
-; X64-BMI2-NEXT: movl %edi, %ebp
-; X64-BMI2-NEXT: movl %ebx, %eax
-; X64-BMI2-NEXT: negb %al
+; X64-BMI2-NEXT: bzhil %esi, %edi, %eax
+; X64-BMI2-NEXT: # kill: def $sil killed $sil killed $esi def $esi
+; X64-BMI2-NEXT: negb %sil
; X64-BMI2-NEXT: movl $-1, %ecx
-; X64-BMI2-NEXT: shrxl %eax, %ecx, %edi
-; X64-BMI2-NEXT: callq use32 at PLT
-; X64-BMI2-NEXT: bzhil %ebx, %ebp, %eax
-; X64-BMI2-NEXT: addq $8, %rsp
-; X64-BMI2-NEXT: popq %rbx
-; X64-BMI2-NEXT: popq %rbp
+; X64-BMI2-NEXT: shrxl %esi, %ecx, %ecx
+; X64-BMI2-NEXT: movl %ecx, (%rdx)
; X64-BMI2-NEXT: retq
%numhighbits = sub i32 32, %numlowbits
%mask = lshr i32 -1, %numhighbits
- call void @use32(i32 %mask)
+ store i32 %mask, i32* %escape
%masked = and i32 %val, %mask ; swapped order
ret i32 %masked
}
; 64-bit
-declare void @use64(i64)
-
-define i64 @bzhi64_c0(i64 %val, i64 %numlowbits) nounwind {
+define i64 @bzhi64_c0(i64 %val, i64 %numlowbits, i64* %escape) nounwind {
; X86-NOBMI-LABEL: bzhi64_c0:
; X86-NOBMI: # %bb.0:
-; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: pushl %eax
+; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOBMI-NEXT: movb $64, %cl
; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT: movl $-1, %esi
-; X86-NOBMI-NEXT: movl $-1, %edi
-; X86-NOBMI-NEXT: shrl %cl, %edi
+; X86-NOBMI-NEXT: movl $-1, %eax
+; X86-NOBMI-NEXT: movl $-1, %edx
+; X86-NOBMI-NEXT: shrl %cl, %edx
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: je .LBB34_2
; X86-NOBMI-NEXT: # %bb.1:
-; X86-NOBMI-NEXT: movl %edi, %esi
-; X86-NOBMI-NEXT: xorl %edi, %edi
+; X86-NOBMI-NEXT: movl %edx, %eax
+; X86-NOBMI-NEXT: xorl %edx, %edx
; X86-NOBMI-NEXT: .LBB34_2:
-; X86-NOBMI-NEXT: subl $8, %esp
-; X86-NOBMI-NEXT: pushl %edi
-; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: calll use64 at PLT
-; X86-NOBMI-NEXT: addl $16, %esp
-; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %esi
-; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edi
-; X86-NOBMI-NEXT: movl %esi, %eax
-; X86-NOBMI-NEXT: movl %edi, %edx
-; X86-NOBMI-NEXT: addl $4, %esp
+; X86-NOBMI-NEXT: movl %edx, 4(%esi)
+; X86-NOBMI-NEXT: movl %eax, (%esi)
+; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-NOBMI-NEXT: popl %esi
-; X86-NOBMI-NEXT: popl %edi
; X86-NOBMI-NEXT: retl
;
; X86-BMI1-LABEL: bzhi64_c0:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: pushl %eax
+; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI1-NEXT: movb $64, %cl
; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1-NEXT: movl $-1, %esi
-; X86-BMI1-NEXT: movl $-1, %edi
-; X86-BMI1-NEXT: shrl %cl, %edi
+; X86-BMI1-NEXT: movl $-1, %eax
+; X86-BMI1-NEXT: movl $-1, %edx
+; X86-BMI1-NEXT: shrl %cl, %edx
; X86-BMI1-NEXT: testb $32, %cl
; X86-BMI1-NEXT: je .LBB34_2
; X86-BMI1-NEXT: # %bb.1:
-; X86-BMI1-NEXT: movl %edi, %esi
-; X86-BMI1-NEXT: xorl %edi, %edi
+; X86-BMI1-NEXT: movl %edx, %eax
+; X86-BMI1-NEXT: xorl %edx, %edx
; X86-BMI1-NEXT: .LBB34_2:
-; X86-BMI1-NEXT: subl $8, %esp
-; X86-BMI1-NEXT: pushl %edi
-; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: calll use64 at PLT
-; X86-BMI1-NEXT: addl $16, %esp
-; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %esi
-; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edi
-; X86-BMI1-NEXT: movl %esi, %eax
-; X86-BMI1-NEXT: movl %edi, %edx
-; X86-BMI1-NEXT: addl $4, %esp
+; X86-BMI1-NEXT: movl %edx, 4(%esi)
+; X86-BMI1-NEXT: movl %eax, (%esi)
+; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-BMI1-NEXT: popl %esi
-; X86-BMI1-NEXT: popl %edi
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: bzhi64_c0:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: pushl %edi
-; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: pushl %eax
-; X86-BMI2-NEXT: movb $64, %al
-; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al
-; X86-BMI2-NEXT: movl $-1, %edi
-; X86-BMI2-NEXT: shrxl %eax, %edi, %esi
-; X86-BMI2-NEXT: testb $32, %al
+; X86-BMI2-NEXT: pushl %ebx
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT: movb $64, %bl
+; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT: movl $-1, %eax
+; X86-BMI2-NEXT: shrxl %ebx, %eax, %edx
+; X86-BMI2-NEXT: testb $32, %bl
; X86-BMI2-NEXT: je .LBB34_2
; X86-BMI2-NEXT: # %bb.1:
-; X86-BMI2-NEXT: movl %esi, %edi
-; X86-BMI2-NEXT: xorl %esi, %esi
+; X86-BMI2-NEXT: movl %edx, %eax
+; X86-BMI2-NEXT: xorl %edx, %edx
; X86-BMI2-NEXT: .LBB34_2:
-; X86-BMI2-NEXT: subl $8, %esp
-; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: pushl %edi
-; X86-BMI2-NEXT: calll use64 at PLT
-; X86-BMI2-NEXT: addl $16, %esp
-; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edi
-; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi
-; X86-BMI2-NEXT: movl %edi, %eax
-; X86-BMI2-NEXT: movl %esi, %edx
-; X86-BMI2-NEXT: addl $4, %esp
-; X86-BMI2-NEXT: popl %esi
-; X86-BMI2-NEXT: popl %edi
+; X86-BMI2-NEXT: movl %edx, 4(%ecx)
+; X86-BMI2-NEXT: movl %eax, (%ecx)
+; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT: popl %ebx
; X86-BMI2-NEXT: retl
;
; X64-NOBMI-LABEL: bzhi64_c0:
; X64-NOBMI: # %bb.0:
-; X64-NOBMI-NEXT: pushq %r14
-; X64-NOBMI-NEXT: pushq %rbx
-; X64-NOBMI-NEXT: pushq %rax
; X64-NOBMI-NEXT: movq %rsi, %rcx
-; X64-NOBMI-NEXT: movq %rdi, %r14
; X64-NOBMI-NEXT: negb %cl
-; X64-NOBMI-NEXT: movq $-1, %rbx
+; X64-NOBMI-NEXT: movq $-1, %rax
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx
-; X64-NOBMI-NEXT: shrq %cl, %rbx
-; X64-NOBMI-NEXT: movq %rbx, %rdi
-; X64-NOBMI-NEXT: callq use64 at PLT
-; X64-NOBMI-NEXT: andq %r14, %rbx
-; X64-NOBMI-NEXT: movq %rbx, %rax
-; X64-NOBMI-NEXT: addq $8, %rsp
-; X64-NOBMI-NEXT: popq %rbx
-; X64-NOBMI-NEXT: popq %r14
+; X64-NOBMI-NEXT: shrq %cl, %rax
+; X64-NOBMI-NEXT: movq %rax, (%rdx)
+; X64-NOBMI-NEXT: andq %rdi, %rax
; X64-NOBMI-NEXT: retq
;
; X64-BMI1-LABEL: bzhi64_c0:
; X64-BMI1: # %bb.0:
-; X64-BMI1-NEXT: pushq %r14
-; X64-BMI1-NEXT: pushq %rbx
-; X64-BMI1-NEXT: pushq %rax
; X64-BMI1-NEXT: movq %rsi, %rcx
-; X64-BMI1-NEXT: movq %rdi, %r14
; X64-BMI1-NEXT: negb %cl
-; X64-BMI1-NEXT: movq $-1, %rbx
+; X64-BMI1-NEXT: movq $-1, %rax
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx
-; X64-BMI1-NEXT: shrq %cl, %rbx
-; X64-BMI1-NEXT: movq %rbx, %rdi
-; X64-BMI1-NEXT: callq use64 at PLT
-; X64-BMI1-NEXT: andq %r14, %rbx
-; X64-BMI1-NEXT: movq %rbx, %rax
-; X64-BMI1-NEXT: addq $8, %rsp
-; X64-BMI1-NEXT: popq %rbx
-; X64-BMI1-NEXT: popq %r14
+; X64-BMI1-NEXT: shrq %cl, %rax
+; X64-BMI1-NEXT: movq %rax, (%rdx)
+; X64-BMI1-NEXT: andq %rdi, %rax
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: bzhi64_c0:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: pushq %r14
-; X64-BMI2-NEXT: pushq %rbx
-; X64-BMI2-NEXT: pushq %rax
-; X64-BMI2-NEXT: movq %rsi, %rbx
-; X64-BMI2-NEXT: movq %rdi, %r14
-; X64-BMI2-NEXT: movl %ebx, %eax
-; X64-BMI2-NEXT: negb %al
+; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax
+; X64-BMI2-NEXT: # kill: def $sil killed $sil killed $rsi def $rsi
+; X64-BMI2-NEXT: negb %sil
; X64-BMI2-NEXT: movq $-1, %rcx
-; X64-BMI2-NEXT: shrxq %rax, %rcx, %rdi
-; X64-BMI2-NEXT: callq use64 at PLT
-; X64-BMI2-NEXT: bzhiq %rbx, %r14, %rax
-; X64-BMI2-NEXT: addq $8, %rsp
-; X64-BMI2-NEXT: popq %rbx
-; X64-BMI2-NEXT: popq %r14
+; X64-BMI2-NEXT: shrxq %rsi, %rcx, %rcx
+; X64-BMI2-NEXT: movq %rcx, (%rdx)
; X64-BMI2-NEXT: retq
%numhighbits = sub i64 64, %numlowbits
%mask = lshr i64 -1, %numhighbits
- call void @use64(i64 %mask)
+ store i64 %mask, i64* %escape
%masked = and i64 %mask, %val
ret i64 %masked
}
-define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind {
+define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits, i64* %escape) nounwind {
; X86-NOBMI-LABEL: bzhi64_c1_indexzext:
; X86-NOBMI: # %bb.0:
-; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: pushl %eax
+; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOBMI-NEXT: movb $64, %cl
; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT: movl $-1, %esi
-; X86-NOBMI-NEXT: movl $-1, %edi
-; X86-NOBMI-NEXT: shrl %cl, %edi
+; X86-NOBMI-NEXT: movl $-1, %eax
+; X86-NOBMI-NEXT: movl $-1, %edx
+; X86-NOBMI-NEXT: shrl %cl, %edx
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: je .LBB35_2
; X86-NOBMI-NEXT: # %bb.1:
-; X86-NOBMI-NEXT: movl %edi, %esi
-; X86-NOBMI-NEXT: xorl %edi, %edi
+; X86-NOBMI-NEXT: movl %edx, %eax
+; X86-NOBMI-NEXT: xorl %edx, %edx
; X86-NOBMI-NEXT: .LBB35_2:
-; X86-NOBMI-NEXT: subl $8, %esp
-; X86-NOBMI-NEXT: pushl %edi
-; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: calll use64 at PLT
-; X86-NOBMI-NEXT: addl $16, %esp
-; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %esi
-; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edi
-; X86-NOBMI-NEXT: movl %esi, %eax
-; X86-NOBMI-NEXT: movl %edi, %edx
-; X86-NOBMI-NEXT: addl $4, %esp
+; X86-NOBMI-NEXT: movl %edx, 4(%esi)
+; X86-NOBMI-NEXT: movl %eax, (%esi)
+; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-NOBMI-NEXT: popl %esi
-; X86-NOBMI-NEXT: popl %edi
; X86-NOBMI-NEXT: retl
;
; X86-BMI1-LABEL: bzhi64_c1_indexzext:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: pushl %eax
+; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI1-NEXT: movb $64, %cl
; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1-NEXT: movl $-1, %esi
-; X86-BMI1-NEXT: movl $-1, %edi
-; X86-BMI1-NEXT: shrl %cl, %edi
+; X86-BMI1-NEXT: movl $-1, %eax
+; X86-BMI1-NEXT: movl $-1, %edx
+; X86-BMI1-NEXT: shrl %cl, %edx
; X86-BMI1-NEXT: testb $32, %cl
; X86-BMI1-NEXT: je .LBB35_2
; X86-BMI1-NEXT: # %bb.1:
-; X86-BMI1-NEXT: movl %edi, %esi
-; X86-BMI1-NEXT: xorl %edi, %edi
+; X86-BMI1-NEXT: movl %edx, %eax
+; X86-BMI1-NEXT: xorl %edx, %edx
; X86-BMI1-NEXT: .LBB35_2:
-; X86-BMI1-NEXT: subl $8, %esp
-; X86-BMI1-NEXT: pushl %edi
-; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: calll use64 at PLT
-; X86-BMI1-NEXT: addl $16, %esp
-; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %esi
-; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edi
-; X86-BMI1-NEXT: movl %esi, %eax
-; X86-BMI1-NEXT: movl %edi, %edx
-; X86-BMI1-NEXT: addl $4, %esp
+; X86-BMI1-NEXT: movl %edx, 4(%esi)
+; X86-BMI1-NEXT: movl %eax, (%esi)
+; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-BMI1-NEXT: popl %esi
-; X86-BMI1-NEXT: popl %edi
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: bzhi64_c1_indexzext:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: pushl %edi
-; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: pushl %eax
-; X86-BMI2-NEXT: movb $64, %al
-; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al
-; X86-BMI2-NEXT: movl $-1, %edi
-; X86-BMI2-NEXT: shrxl %eax, %edi, %esi
-; X86-BMI2-NEXT: testb $32, %al
+; X86-BMI2-NEXT: pushl %ebx
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT: movb $64, %bl
+; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT: movl $-1, %eax
+; X86-BMI2-NEXT: shrxl %ebx, %eax, %edx
+; X86-BMI2-NEXT: testb $32, %bl
; X86-BMI2-NEXT: je .LBB35_2
; X86-BMI2-NEXT: # %bb.1:
-; X86-BMI2-NEXT: movl %esi, %edi
-; X86-BMI2-NEXT: xorl %esi, %esi
+; X86-BMI2-NEXT: movl %edx, %eax
+; X86-BMI2-NEXT: xorl %edx, %edx
; X86-BMI2-NEXT: .LBB35_2:
-; X86-BMI2-NEXT: subl $8, %esp
-; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: pushl %edi
-; X86-BMI2-NEXT: calll use64 at PLT
-; X86-BMI2-NEXT: addl $16, %esp
-; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edi
-; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi
-; X86-BMI2-NEXT: movl %edi, %eax
-; X86-BMI2-NEXT: movl %esi, %edx
-; X86-BMI2-NEXT: addl $4, %esp
-; X86-BMI2-NEXT: popl %esi
-; X86-BMI2-NEXT: popl %edi
+; X86-BMI2-NEXT: movl %edx, 4(%ecx)
+; X86-BMI2-NEXT: movl %eax, (%ecx)
+; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT: popl %ebx
; X86-BMI2-NEXT: retl
;
; X64-NOBMI-LABEL: bzhi64_c1_indexzext:
; X64-NOBMI: # %bb.0:
-; X64-NOBMI-NEXT: pushq %r14
-; X64-NOBMI-NEXT: pushq %rbx
-; X64-NOBMI-NEXT: pushq %rax
; X64-NOBMI-NEXT: movl %esi, %ecx
-; X64-NOBMI-NEXT: movq %rdi, %r14
; X64-NOBMI-NEXT: negb %cl
-; X64-NOBMI-NEXT: movq $-1, %rbx
+; X64-NOBMI-NEXT: movq $-1, %rax
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI-NEXT: shrq %cl, %rbx
-; X64-NOBMI-NEXT: movq %rbx, %rdi
-; X64-NOBMI-NEXT: callq use64 at PLT
-; X64-NOBMI-NEXT: andq %r14, %rbx
-; X64-NOBMI-NEXT: movq %rbx, %rax
-; X64-NOBMI-NEXT: addq $8, %rsp
-; X64-NOBMI-NEXT: popq %rbx
-; X64-NOBMI-NEXT: popq %r14
+; X64-NOBMI-NEXT: shrq %cl, %rax
+; X64-NOBMI-NEXT: movq %rax, (%rdx)
+; X64-NOBMI-NEXT: andq %rdi, %rax
; X64-NOBMI-NEXT: retq
;
; X64-BMI1-LABEL: bzhi64_c1_indexzext:
; X64-BMI1: # %bb.0:
-; X64-BMI1-NEXT: pushq %r14
-; X64-BMI1-NEXT: pushq %rbx
-; X64-BMI1-NEXT: pushq %rax
; X64-BMI1-NEXT: movl %esi, %ecx
-; X64-BMI1-NEXT: movq %rdi, %r14
; X64-BMI1-NEXT: negb %cl
-; X64-BMI1-NEXT: movq $-1, %rbx
+; X64-BMI1-NEXT: movq $-1, %rax
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-BMI1-NEXT: shrq %cl, %rbx
-; X64-BMI1-NEXT: movq %rbx, %rdi
-; X64-BMI1-NEXT: callq use64 at PLT
-; X64-BMI1-NEXT: andq %r14, %rbx
-; X64-BMI1-NEXT: movq %rbx, %rax
-; X64-BMI1-NEXT: addq $8, %rsp
-; X64-BMI1-NEXT: popq %rbx
-; X64-BMI1-NEXT: popq %r14
+; X64-BMI1-NEXT: shrq %cl, %rax
+; X64-BMI1-NEXT: movq %rax, (%rdx)
+; X64-BMI1-NEXT: andq %rdi, %rax
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: bzhi64_c1_indexzext:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: pushq %r14
-; X64-BMI2-NEXT: pushq %rbx
-; X64-BMI2-NEXT: pushq %rax
-; X64-BMI2-NEXT: movl %esi, %ebx
-; X64-BMI2-NEXT: movq %rdi, %r14
-; X64-BMI2-NEXT: movl %ebx, %eax
-; X64-BMI2-NEXT: negb %al
+; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax
+; X64-BMI2-NEXT: # kill: def $sil killed $sil killed $rsi def $rsi
+; X64-BMI2-NEXT: negb %sil
; X64-BMI2-NEXT: movq $-1, %rcx
-; X64-BMI2-NEXT: shrxq %rax, %rcx, %rdi
-; X64-BMI2-NEXT: callq use64 at PLT
-; X64-BMI2-NEXT: bzhiq %rbx, %r14, %rax
-; X64-BMI2-NEXT: addq $8, %rsp
-; X64-BMI2-NEXT: popq %rbx
-; X64-BMI2-NEXT: popq %r14
+; X64-BMI2-NEXT: shrxq %rsi, %rcx, %rcx
+; X64-BMI2-NEXT: movq %rcx, (%rdx)
; X64-BMI2-NEXT: retq
%numhighbits = sub i8 64, %numlowbits
%sh_prom = zext i8 %numhighbits to i64
%mask = lshr i64 -1, %sh_prom
- call void @use64(i64 %mask)
+ store i64 %mask, i64* %escape
%masked = and i64 %mask, %val
ret i64 %masked
}
-define i64 @bzhi64_c2_load(i64* %w, i64 %numlowbits) nounwind {
+define i64 @bzhi64_c2_load(i64* %w, i64 %numlowbits, i64* %escape) nounwind {
; X86-NOBMI-LABEL: bzhi64_c2_load:
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: pushl %ebx
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movb $64, %cl
; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT: movl $-1, %eax
+; X86-NOBMI-NEXT: movl $-1, %edi
; X86-NOBMI-NEXT: movl $-1, %ebx
; X86-NOBMI-NEXT: shrl %cl, %ebx
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: je .LBB36_2
; X86-NOBMI-NEXT: # %bb.1:
-; X86-NOBMI-NEXT: movl %ebx, %eax
+; X86-NOBMI-NEXT: movl %ebx, %edi
; X86-NOBMI-NEXT: xorl %ebx, %ebx
; X86-NOBMI-NEXT: .LBB36_2:
-; X86-NOBMI-NEXT: movl 4(%edx), %esi
-; X86-NOBMI-NEXT: andl %ebx, %esi
-; X86-NOBMI-NEXT: movl (%edx), %edi
-; X86-NOBMI-NEXT: andl %eax, %edi
-; X86-NOBMI-NEXT: subl $8, %esp
-; X86-NOBMI-NEXT: pushl %ebx
-; X86-NOBMI-NEXT: pushl %eax
-; X86-NOBMI-NEXT: calll use64 at PLT
-; X86-NOBMI-NEXT: addl $16, %esp
-; X86-NOBMI-NEXT: movl %edi, %eax
-; X86-NOBMI-NEXT: movl %esi, %edx
+; X86-NOBMI-NEXT: movl 4(%eax), %edx
+; X86-NOBMI-NEXT: andl %ebx, %edx
+; X86-NOBMI-NEXT: movl (%eax), %eax
+; X86-NOBMI-NEXT: andl %edi, %eax
+; X86-NOBMI-NEXT: movl %ebx, 4(%esi)
+; X86-NOBMI-NEXT: movl %edi, (%esi)
; X86-NOBMI-NEXT: popl %esi
; X86-NOBMI-NEXT: popl %edi
; X86-NOBMI-NEXT: popl %ebx
@@ -2963,29 +2660,25 @@ define i64 @bzhi64_c2_load(i64* %w, i64 %numlowbits) nounwind {
; X86-BMI1-NEXT: pushl %ebx
; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: movb $64, %cl
; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1-NEXT: movl $-1, %eax
+; X86-BMI1-NEXT: movl $-1, %edi
; X86-BMI1-NEXT: movl $-1, %ebx
; X86-BMI1-NEXT: shrl %cl, %ebx
; X86-BMI1-NEXT: testb $32, %cl
; X86-BMI1-NEXT: je .LBB36_2
; X86-BMI1-NEXT: # %bb.1:
-; X86-BMI1-NEXT: movl %ebx, %eax
+; X86-BMI1-NEXT: movl %ebx, %edi
; X86-BMI1-NEXT: xorl %ebx, %ebx
; X86-BMI1-NEXT: .LBB36_2:
-; X86-BMI1-NEXT: movl 4(%edx), %esi
-; X86-BMI1-NEXT: andl %ebx, %esi
-; X86-BMI1-NEXT: movl (%edx), %edi
-; X86-BMI1-NEXT: andl %eax, %edi
-; X86-BMI1-NEXT: subl $8, %esp
-; X86-BMI1-NEXT: pushl %ebx
-; X86-BMI1-NEXT: pushl %eax
-; X86-BMI1-NEXT: calll use64 at PLT
-; X86-BMI1-NEXT: addl $16, %esp
-; X86-BMI1-NEXT: movl %edi, %eax
-; X86-BMI1-NEXT: movl %esi, %edx
+; X86-BMI1-NEXT: movl 4(%eax), %edx
+; X86-BMI1-NEXT: andl %ebx, %edx
+; X86-BMI1-NEXT: movl (%eax), %eax
+; X86-BMI1-NEXT: andl %edi, %eax
+; X86-BMI1-NEXT: movl %ebx, 4(%esi)
+; X86-BMI1-NEXT: movl %edi, (%esi)
; X86-BMI1-NEXT: popl %esi
; X86-BMI1-NEXT: popl %edi
; X86-BMI1-NEXT: popl %ebx
@@ -2993,117 +2686,96 @@ define i64 @bzhi64_c2_load(i64* %w, i64 %numlowbits) nounwind {
;
; X86-BMI2-LABEL: bzhi64_c2_load:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: pushl %ebx
; X86-BMI2-NEXT: pushl %edi
; X86-BMI2-NEXT: pushl %esi
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT: movb $64, %bl
-; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %bl
-; X86-BMI2-NEXT: movl $-1, %ecx
-; X86-BMI2-NEXT: shrxl %ebx, %ecx, %edx
-; X86-BMI2-NEXT: testb $32, %bl
+; X86-BMI2-NEXT: movb $64, %dl
+; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %dl
+; X86-BMI2-NEXT: movl $-1, %esi
+; X86-BMI2-NEXT: shrxl %edx, %esi, %edi
+; X86-BMI2-NEXT: testb $32, %dl
; X86-BMI2-NEXT: je .LBB36_2
; X86-BMI2-NEXT: # %bb.1:
-; X86-BMI2-NEXT: movl %edx, %ecx
-; X86-BMI2-NEXT: xorl %edx, %edx
+; X86-BMI2-NEXT: movl %edi, %esi
+; X86-BMI2-NEXT: xorl %edi, %edi
; X86-BMI2-NEXT: .LBB36_2:
-; X86-BMI2-NEXT: movl 4(%eax), %esi
-; X86-BMI2-NEXT: andl %edx, %esi
-; X86-BMI2-NEXT: movl (%eax), %edi
-; X86-BMI2-NEXT: andl %ecx, %edi
-; X86-BMI2-NEXT: subl $8, %esp
-; X86-BMI2-NEXT: pushl %edx
-; X86-BMI2-NEXT: pushl %ecx
-; X86-BMI2-NEXT: calll use64 at PLT
-; X86-BMI2-NEXT: addl $16, %esp
-; X86-BMI2-NEXT: movl %edi, %eax
-; X86-BMI2-NEXT: movl %esi, %edx
+; X86-BMI2-NEXT: movl 4(%eax), %edx
+; X86-BMI2-NEXT: andl %edi, %edx
+; X86-BMI2-NEXT: movl (%eax), %eax
+; X86-BMI2-NEXT: andl %esi, %eax
+; X86-BMI2-NEXT: movl %edi, 4(%ecx)
+; X86-BMI2-NEXT: movl %esi, (%ecx)
; X86-BMI2-NEXT: popl %esi
; X86-BMI2-NEXT: popl %edi
-; X86-BMI2-NEXT: popl %ebx
; X86-BMI2-NEXT: retl
;
; X64-NOBMI-LABEL: bzhi64_c2_load:
; X64-NOBMI: # %bb.0:
-; X64-NOBMI-NEXT: pushq %rbx
; X64-NOBMI-NEXT: movq %rsi, %rcx
; X64-NOBMI-NEXT: negb %cl
-; X64-NOBMI-NEXT: movq $-1, %rax
+; X64-NOBMI-NEXT: movq $-1, %rsi
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx
-; X64-NOBMI-NEXT: shrq %cl, %rax
-; X64-NOBMI-NEXT: movq (%rdi), %rbx
-; X64-NOBMI-NEXT: andq %rax, %rbx
-; X64-NOBMI-NEXT: movq %rax, %rdi
-; X64-NOBMI-NEXT: callq use64 at PLT
-; X64-NOBMI-NEXT: movq %rbx, %rax
-; X64-NOBMI-NEXT: popq %rbx
+; X64-NOBMI-NEXT: shrq %cl, %rsi
+; X64-NOBMI-NEXT: movq (%rdi), %rax
+; X64-NOBMI-NEXT: andq %rsi, %rax
+; X64-NOBMI-NEXT: movq %rsi, (%rdx)
; X64-NOBMI-NEXT: retq
;
; X64-BMI1-LABEL: bzhi64_c2_load:
; X64-BMI1: # %bb.0:
-; X64-BMI1-NEXT: pushq %rbx
; X64-BMI1-NEXT: movq %rsi, %rcx
; X64-BMI1-NEXT: negb %cl
-; X64-BMI1-NEXT: movq $-1, %rax
+; X64-BMI1-NEXT: movq $-1, %rsi
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx
-; X64-BMI1-NEXT: shrq %cl, %rax
-; X64-BMI1-NEXT: movq (%rdi), %rbx
-; X64-BMI1-NEXT: andq %rax, %rbx
-; X64-BMI1-NEXT: movq %rax, %rdi
-; X64-BMI1-NEXT: callq use64 at PLT
-; X64-BMI1-NEXT: movq %rbx, %rax
-; X64-BMI1-NEXT: popq %rbx
+; X64-BMI1-NEXT: shrq %cl, %rsi
+; X64-BMI1-NEXT: movq (%rdi), %rax
+; X64-BMI1-NEXT: andq %rsi, %rax
+; X64-BMI1-NEXT: movq %rsi, (%rdx)
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: bzhi64_c2_load:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: pushq %rbx
-; X64-BMI2-NEXT: bzhiq %rsi, (%rdi), %rbx
+; X64-BMI2-NEXT: bzhiq %rsi, (%rdi), %rax
; X64-BMI2-NEXT: # kill: def $sil killed $sil killed $rsi def $rsi
; X64-BMI2-NEXT: negb %sil
-; X64-BMI2-NEXT: movq $-1, %rax
-; X64-BMI2-NEXT: shrxq %rsi, %rax, %rdi
-; X64-BMI2-NEXT: callq use64 at PLT
-; X64-BMI2-NEXT: movq %rbx, %rax
-; X64-BMI2-NEXT: popq %rbx
+; X64-BMI2-NEXT: movq $-1, %rcx
+; X64-BMI2-NEXT: shrxq %rsi, %rcx, %rcx
+; X64-BMI2-NEXT: movq %rcx, (%rdx)
; X64-BMI2-NEXT: retq
%val = load i64, i64* %w
%numhighbits = sub i64 64, %numlowbits
%mask = lshr i64 -1, %numhighbits
- call void @use64(i64 %mask)
+ store i64 %mask, i64* %escape
%masked = and i64 %mask, %val
ret i64 %masked
}
-define i64 @bzhi64_c3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
+define i64 @bzhi64_c3_load_indexzext(i64* %w, i8 %numlowbits, i64* %escape) nounwind {
; X86-NOBMI-LABEL: bzhi64_c3_load_indexzext:
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: pushl %ebx
; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movb $64, %cl
; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT: movl $-1, %eax
+; X86-NOBMI-NEXT: movl $-1, %edi
; X86-NOBMI-NEXT: movl $-1, %ebx
; X86-NOBMI-NEXT: shrl %cl, %ebx
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: je .LBB37_2
; X86-NOBMI-NEXT: # %bb.1:
-; X86-NOBMI-NEXT: movl %ebx, %eax
+; X86-NOBMI-NEXT: movl %ebx, %edi
; X86-NOBMI-NEXT: xorl %ebx, %ebx
; X86-NOBMI-NEXT: .LBB37_2:
-; X86-NOBMI-NEXT: movl 4(%edx), %esi
-; X86-NOBMI-NEXT: andl %ebx, %esi
-; X86-NOBMI-NEXT: movl (%edx), %edi
-; X86-NOBMI-NEXT: andl %eax, %edi
-; X86-NOBMI-NEXT: subl $8, %esp
-; X86-NOBMI-NEXT: pushl %ebx
-; X86-NOBMI-NEXT: pushl %eax
-; X86-NOBMI-NEXT: calll use64 at PLT
-; X86-NOBMI-NEXT: addl $16, %esp
-; X86-NOBMI-NEXT: movl %edi, %eax
-; X86-NOBMI-NEXT: movl %esi, %edx
+; X86-NOBMI-NEXT: movl 4(%eax), %edx
+; X86-NOBMI-NEXT: andl %ebx, %edx
+; X86-NOBMI-NEXT: movl (%eax), %eax
+; X86-NOBMI-NEXT: andl %edi, %eax
+; X86-NOBMI-NEXT: movl %ebx, 4(%esi)
+; X86-NOBMI-NEXT: movl %edi, (%esi)
; X86-NOBMI-NEXT: popl %esi
; X86-NOBMI-NEXT: popl %edi
; X86-NOBMI-NEXT: popl %ebx
@@ -3114,29 +2786,25 @@ define i64 @bzhi64_c3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
; X86-BMI1-NEXT: pushl %ebx
; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1-NEXT: movb $64, %cl
; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1-NEXT: movl $-1, %eax
+; X86-BMI1-NEXT: movl $-1, %edi
; X86-BMI1-NEXT: movl $-1, %ebx
; X86-BMI1-NEXT: shrl %cl, %ebx
; X86-BMI1-NEXT: testb $32, %cl
; X86-BMI1-NEXT: je .LBB37_2
; X86-BMI1-NEXT: # %bb.1:
-; X86-BMI1-NEXT: movl %ebx, %eax
+; X86-BMI1-NEXT: movl %ebx, %edi
; X86-BMI1-NEXT: xorl %ebx, %ebx
; X86-BMI1-NEXT: .LBB37_2:
-; X86-BMI1-NEXT: movl 4(%edx), %esi
-; X86-BMI1-NEXT: andl %ebx, %esi
-; X86-BMI1-NEXT: movl (%edx), %edi
-; X86-BMI1-NEXT: andl %eax, %edi
-; X86-BMI1-NEXT: subl $8, %esp
-; X86-BMI1-NEXT: pushl %ebx
-; X86-BMI1-NEXT: pushl %eax
-; X86-BMI1-NEXT: calll use64 at PLT
-; X86-BMI1-NEXT: addl $16, %esp
-; X86-BMI1-NEXT: movl %edi, %eax
-; X86-BMI1-NEXT: movl %esi, %edx
+; X86-BMI1-NEXT: movl 4(%eax), %edx
+; X86-BMI1-NEXT: andl %ebx, %edx
+; X86-BMI1-NEXT: movl (%eax), %eax
+; X86-BMI1-NEXT: andl %edi, %eax
+; X86-BMI1-NEXT: movl %ebx, 4(%esi)
+; X86-BMI1-NEXT: movl %edi, (%esi)
; X86-BMI1-NEXT: popl %esi
; X86-BMI1-NEXT: popl %edi
; X86-BMI1-NEXT: popl %ebx
@@ -3144,240 +2812,173 @@ define i64 @bzhi64_c3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
;
; X86-BMI2-LABEL: bzhi64_c3_load_indexzext:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: pushl %ebx
; X86-BMI2-NEXT: pushl %edi
; X86-BMI2-NEXT: pushl %esi
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT: movb $64, %bl
-; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %bl
-; X86-BMI2-NEXT: movl $-1, %ecx
-; X86-BMI2-NEXT: shrxl %ebx, %ecx, %edx
-; X86-BMI2-NEXT: testb $32, %bl
+; X86-BMI2-NEXT: movb $64, %dl
+; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %dl
+; X86-BMI2-NEXT: movl $-1, %esi
+; X86-BMI2-NEXT: shrxl %edx, %esi, %edi
+; X86-BMI2-NEXT: testb $32, %dl
; X86-BMI2-NEXT: je .LBB37_2
; X86-BMI2-NEXT: # %bb.1:
-; X86-BMI2-NEXT: movl %edx, %ecx
-; X86-BMI2-NEXT: xorl %edx, %edx
+; X86-BMI2-NEXT: movl %edi, %esi
+; X86-BMI2-NEXT: xorl %edi, %edi
; X86-BMI2-NEXT: .LBB37_2:
-; X86-BMI2-NEXT: movl 4(%eax), %esi
-; X86-BMI2-NEXT: andl %edx, %esi
-; X86-BMI2-NEXT: movl (%eax), %edi
-; X86-BMI2-NEXT: andl %ecx, %edi
-; X86-BMI2-NEXT: subl $8, %esp
-; X86-BMI2-NEXT: pushl %edx
-; X86-BMI2-NEXT: pushl %ecx
-; X86-BMI2-NEXT: calll use64 at PLT
-; X86-BMI2-NEXT: addl $16, %esp
-; X86-BMI2-NEXT: movl %edi, %eax
-; X86-BMI2-NEXT: movl %esi, %edx
+; X86-BMI2-NEXT: movl 4(%eax), %edx
+; X86-BMI2-NEXT: andl %edi, %edx
+; X86-BMI2-NEXT: movl (%eax), %eax
+; X86-BMI2-NEXT: andl %esi, %eax
+; X86-BMI2-NEXT: movl %edi, 4(%ecx)
+; X86-BMI2-NEXT: movl %esi, (%ecx)
; X86-BMI2-NEXT: popl %esi
; X86-BMI2-NEXT: popl %edi
-; X86-BMI2-NEXT: popl %ebx
; X86-BMI2-NEXT: retl
;
; X64-NOBMI-LABEL: bzhi64_c3_load_indexzext:
; X64-NOBMI: # %bb.0:
-; X64-NOBMI-NEXT: pushq %rbx
; X64-NOBMI-NEXT: movl %esi, %ecx
; X64-NOBMI-NEXT: negb %cl
-; X64-NOBMI-NEXT: movq $-1, %rax
+; X64-NOBMI-NEXT: movq $-1, %rsi
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NOBMI-NEXT: shrq %cl, %rax
-; X64-NOBMI-NEXT: movq (%rdi), %rbx
-; X64-NOBMI-NEXT: andq %rax, %rbx
-; X64-NOBMI-NEXT: movq %rax, %rdi
-; X64-NOBMI-NEXT: callq use64 at PLT
-; X64-NOBMI-NEXT: movq %rbx, %rax
-; X64-NOBMI-NEXT: popq %rbx
+; X64-NOBMI-NEXT: shrq %cl, %rsi
+; X64-NOBMI-NEXT: movq (%rdi), %rax
+; X64-NOBMI-NEXT: andq %rsi, %rax
+; X64-NOBMI-NEXT: movq %rsi, (%rdx)
; X64-NOBMI-NEXT: retq
;
; X64-BMI1-LABEL: bzhi64_c3_load_indexzext:
; X64-BMI1: # %bb.0:
-; X64-BMI1-NEXT: pushq %rbx
; X64-BMI1-NEXT: movl %esi, %ecx
; X64-BMI1-NEXT: negb %cl
-; X64-BMI1-NEXT: movq $-1, %rax
+; X64-BMI1-NEXT: movq $-1, %rsi
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-BMI1-NEXT: shrq %cl, %rax
-; X64-BMI1-NEXT: movq (%rdi), %rbx
-; X64-BMI1-NEXT: andq %rax, %rbx
-; X64-BMI1-NEXT: movq %rax, %rdi
-; X64-BMI1-NEXT: callq use64 at PLT
-; X64-BMI1-NEXT: movq %rbx, %rax
-; X64-BMI1-NEXT: popq %rbx
+; X64-BMI1-NEXT: shrq %cl, %rsi
+; X64-BMI1-NEXT: movq (%rdi), %rax
+; X64-BMI1-NEXT: andq %rsi, %rax
+; X64-BMI1-NEXT: movq %rsi, (%rdx)
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: bzhi64_c3_load_indexzext:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: pushq %rbx
; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi
-; X64-BMI2-NEXT: bzhiq %rsi, (%rdi), %rbx
+; X64-BMI2-NEXT: bzhiq %rsi, (%rdi), %rax
; X64-BMI2-NEXT: # kill: def $sil killed $sil killed $rsi def $rsi
; X64-BMI2-NEXT: negb %sil
-; X64-BMI2-NEXT: movq $-1, %rax
-; X64-BMI2-NEXT: shrxq %rsi, %rax, %rdi
-; X64-BMI2-NEXT: callq use64 at PLT
-; X64-BMI2-NEXT: movq %rbx, %rax
-; X64-BMI2-NEXT: popq %rbx
+; X64-BMI2-NEXT: movq $-1, %rcx
+; X64-BMI2-NEXT: shrxq %rsi, %rcx, %rcx
+; X64-BMI2-NEXT: movq %rcx, (%rdx)
; X64-BMI2-NEXT: retq
%val = load i64, i64* %w
%numhighbits = sub i8 64, %numlowbits
%sh_prom = zext i8 %numhighbits to i64
%mask = lshr i64 -1, %sh_prom
- call void @use64(i64 %mask)
+ store i64 %mask, i64* %escape
%masked = and i64 %mask, %val
ret i64 %masked
}
-define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits) nounwind {
+define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits, i64* %escape) nounwind {
; X86-NOBMI-LABEL: bzhi64_c4_commutative:
; X86-NOBMI: # %bb.0:
-; X86-NOBMI-NEXT: pushl %edi
; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: pushl %eax
+; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NOBMI-NEXT: movb $64, %cl
; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-NOBMI-NEXT: movl $-1, %esi
-; X86-NOBMI-NEXT: movl $-1, %edi
-; X86-NOBMI-NEXT: shrl %cl, %edi
+; X86-NOBMI-NEXT: movl $-1, %eax
+; X86-NOBMI-NEXT: movl $-1, %edx
+; X86-NOBMI-NEXT: shrl %cl, %edx
; X86-NOBMI-NEXT: testb $32, %cl
; X86-NOBMI-NEXT: je .LBB38_2
; X86-NOBMI-NEXT: # %bb.1:
-; X86-NOBMI-NEXT: movl %edi, %esi
-; X86-NOBMI-NEXT: xorl %edi, %edi
+; X86-NOBMI-NEXT: movl %edx, %eax
+; X86-NOBMI-NEXT: xorl %edx, %edx
; X86-NOBMI-NEXT: .LBB38_2:
-; X86-NOBMI-NEXT: subl $8, %esp
-; X86-NOBMI-NEXT: pushl %edi
-; X86-NOBMI-NEXT: pushl %esi
-; X86-NOBMI-NEXT: calll use64 at PLT
-; X86-NOBMI-NEXT: addl $16, %esp
-; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %esi
-; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edi
-; X86-NOBMI-NEXT: movl %esi, %eax
-; X86-NOBMI-NEXT: movl %edi, %edx
-; X86-NOBMI-NEXT: addl $4, %esp
+; X86-NOBMI-NEXT: movl %edx, 4(%esi)
+; X86-NOBMI-NEXT: movl %eax, (%esi)
+; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-NOBMI-NEXT: popl %esi
-; X86-NOBMI-NEXT: popl %edi
; X86-NOBMI-NEXT: retl
;
; X86-BMI1-LABEL: bzhi64_c4_commutative:
; X86-BMI1: # %bb.0:
-; X86-BMI1-NEXT: pushl %edi
; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: pushl %eax
+; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-BMI1-NEXT: movb $64, %cl
; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1-NEXT: movl $-1, %esi
-; X86-BMI1-NEXT: movl $-1, %edi
-; X86-BMI1-NEXT: shrl %cl, %edi
+; X86-BMI1-NEXT: movl $-1, %eax
+; X86-BMI1-NEXT: movl $-1, %edx
+; X86-BMI1-NEXT: shrl %cl, %edx
; X86-BMI1-NEXT: testb $32, %cl
; X86-BMI1-NEXT: je .LBB38_2
; X86-BMI1-NEXT: # %bb.1:
-; X86-BMI1-NEXT: movl %edi, %esi
-; X86-BMI1-NEXT: xorl %edi, %edi
+; X86-BMI1-NEXT: movl %edx, %eax
+; X86-BMI1-NEXT: xorl %edx, %edx
; X86-BMI1-NEXT: .LBB38_2:
-; X86-BMI1-NEXT: subl $8, %esp
-; X86-BMI1-NEXT: pushl %edi
-; X86-BMI1-NEXT: pushl %esi
-; X86-BMI1-NEXT: calll use64 at PLT
-; X86-BMI1-NEXT: addl $16, %esp
-; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %esi
-; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edi
-; X86-BMI1-NEXT: movl %esi, %eax
-; X86-BMI1-NEXT: movl %edi, %edx
-; X86-BMI1-NEXT: addl $4, %esp
+; X86-BMI1-NEXT: movl %edx, 4(%esi)
+; X86-BMI1-NEXT: movl %eax, (%esi)
+; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edx
; X86-BMI1-NEXT: popl %esi
-; X86-BMI1-NEXT: popl %edi
; X86-BMI1-NEXT: retl
;
; X86-BMI2-LABEL: bzhi64_c4_commutative:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: pushl %edi
-; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: pushl %eax
-; X86-BMI2-NEXT: movb $64, %al
-; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al
-; X86-BMI2-NEXT: movl $-1, %edi
-; X86-BMI2-NEXT: shrxl %eax, %edi, %esi
-; X86-BMI2-NEXT: testb $32, %al
+; X86-BMI2-NEXT: pushl %ebx
+; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT: movb $64, %bl
+; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %bl
+; X86-BMI2-NEXT: movl $-1, %eax
+; X86-BMI2-NEXT: shrxl %ebx, %eax, %edx
+; X86-BMI2-NEXT: testb $32, %bl
; X86-BMI2-NEXT: je .LBB38_2
; X86-BMI2-NEXT: # %bb.1:
-; X86-BMI2-NEXT: movl %esi, %edi
-; X86-BMI2-NEXT: xorl %esi, %esi
+; X86-BMI2-NEXT: movl %edx, %eax
+; X86-BMI2-NEXT: xorl %edx, %edx
; X86-BMI2-NEXT: .LBB38_2:
-; X86-BMI2-NEXT: subl $8, %esp
-; X86-BMI2-NEXT: pushl %esi
-; X86-BMI2-NEXT: pushl %edi
-; X86-BMI2-NEXT: calll use64 at PLT
-; X86-BMI2-NEXT: addl $16, %esp
-; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edi
-; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi
-; X86-BMI2-NEXT: movl %edi, %eax
-; X86-BMI2-NEXT: movl %esi, %edx
-; X86-BMI2-NEXT: addl $4, %esp
-; X86-BMI2-NEXT: popl %esi
-; X86-BMI2-NEXT: popl %edi
+; X86-BMI2-NEXT: movl %edx, 4(%ecx)
+; X86-BMI2-NEXT: movl %eax, (%ecx)
+; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT: popl %ebx
; X86-BMI2-NEXT: retl
;
; X64-NOBMI-LABEL: bzhi64_c4_commutative:
; X64-NOBMI: # %bb.0:
-; X64-NOBMI-NEXT: pushq %r14
-; X64-NOBMI-NEXT: pushq %rbx
-; X64-NOBMI-NEXT: pushq %rax
; X64-NOBMI-NEXT: movq %rsi, %rcx
-; X64-NOBMI-NEXT: movq %rdi, %r14
; X64-NOBMI-NEXT: negb %cl
-; X64-NOBMI-NEXT: movq $-1, %rbx
+; X64-NOBMI-NEXT: movq $-1, %rax
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx
-; X64-NOBMI-NEXT: shrq %cl, %rbx
-; X64-NOBMI-NEXT: movq %rbx, %rdi
-; X64-NOBMI-NEXT: callq use64 at PLT
-; X64-NOBMI-NEXT: andq %r14, %rbx
-; X64-NOBMI-NEXT: movq %rbx, %rax
-; X64-NOBMI-NEXT: addq $8, %rsp
-; X64-NOBMI-NEXT: popq %rbx
-; X64-NOBMI-NEXT: popq %r14
+; X64-NOBMI-NEXT: shrq %cl, %rax
+; X64-NOBMI-NEXT: movq %rax, (%rdx)
+; X64-NOBMI-NEXT: andq %rdi, %rax
; X64-NOBMI-NEXT: retq
;
; X64-BMI1-LABEL: bzhi64_c4_commutative:
; X64-BMI1: # %bb.0:
-; X64-BMI1-NEXT: pushq %r14
-; X64-BMI1-NEXT: pushq %rbx
-; X64-BMI1-NEXT: pushq %rax
; X64-BMI1-NEXT: movq %rsi, %rcx
-; X64-BMI1-NEXT: movq %rdi, %r14
; X64-BMI1-NEXT: negb %cl
-; X64-BMI1-NEXT: movq $-1, %rbx
+; X64-BMI1-NEXT: movq $-1, %rax
; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx
-; X64-BMI1-NEXT: shrq %cl, %rbx
-; X64-BMI1-NEXT: movq %rbx, %rdi
-; X64-BMI1-NEXT: callq use64 at PLT
-; X64-BMI1-NEXT: andq %r14, %rbx
-; X64-BMI1-NEXT: movq %rbx, %rax
-; X64-BMI1-NEXT: addq $8, %rsp
-; X64-BMI1-NEXT: popq %rbx
-; X64-BMI1-NEXT: popq %r14
+; X64-BMI1-NEXT: shrq %cl, %rax
+; X64-BMI1-NEXT: movq %rax, (%rdx)
+; X64-BMI1-NEXT: andq %rdi, %rax
; X64-BMI1-NEXT: retq
;
; X64-BMI2-LABEL: bzhi64_c4_commutative:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: pushq %r14
-; X64-BMI2-NEXT: pushq %rbx
-; X64-BMI2-NEXT: pushq %rax
-; X64-BMI2-NEXT: movq %rsi, %rbx
-; X64-BMI2-NEXT: movq %rdi, %r14
-; X64-BMI2-NEXT: movl %ebx, %eax
-; X64-BMI2-NEXT: negb %al
+; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax
+; X64-BMI2-NEXT: # kill: def $sil killed $sil killed $rsi def $rsi
+; X64-BMI2-NEXT: negb %sil
; X64-BMI2-NEXT: movq $-1, %rcx
-; X64-BMI2-NEXT: shrxq %rax, %rcx, %rdi
-; X64-BMI2-NEXT: callq use64 at PLT
-; X64-BMI2-NEXT: bzhiq %rbx, %r14, %rax
-; X64-BMI2-NEXT: addq $8, %rsp
-; X64-BMI2-NEXT: popq %rbx
-; X64-BMI2-NEXT: popq %r14
+; X64-BMI2-NEXT: shrxq %rsi, %rcx, %rcx
+; X64-BMI2-NEXT: movq %rcx, (%rdx)
; X64-BMI2-NEXT: retq
%numhighbits = sub i64 64, %numlowbits
%mask = lshr i64 -1, %numhighbits
- call void @use64(i64 %mask)
+ store i64 %mask, i64* %escape
%masked = and i64 %val, %mask ; swapped order
ret i64 %masked
}
More information about the llvm-commits
mailing list