[llvm] 76fd79b - [X86] Recognize standalone `(1 << nbits) - 1` pattern as bzhi

Danila Malyutin via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 19 23:18:40 PDT 2023


Author: Danila Malyutin
Date: 2023-07-20T09:18:23+03:00
New Revision: 76fd79b9d537c5589f4e26dd7f5660d3c69d397a

URL: https://github.com/llvm/llvm-project/commit/76fd79b9d537c5589f4e26dd7f5660d3c69d397a
DIFF: https://github.com/llvm/llvm-project/commit/76fd79b9d537c5589f4e26dd7f5660d3c69d397a.diff

LOG: [X86] Recognize standalone `(1 << nbits) - 1` pattern as bzhi

This can be thought as a subcase of `x & ((1 << nbits) - 1)` where x == -1

Differential Revision: https://reviews.llvm.org/D155622

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
    llvm/test/CodeGen/X86/extract-bits.ll
    llvm/test/CodeGen/X86/extract-lowbits.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index ca064ee284f30c..4380f8c7ae922e 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -3504,9 +3504,11 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
 //   b) x & ~(-1 << nbits)
 //   c) x &  (-1 >> (32 - y))
 //   d) x << (32 - y) >> (32 - y)
+//   e) (1 << nbits) - 1
 bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
   assert(
-      (Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::SRL) &&
+      (Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::AND ||
+       Node->getOpcode() == ISD::SRL) &&
       "Should be either an and-mask, or right-shift after clearing high bits.");
 
   // BEXTR is BMI instruction, BZHI is BMI2 instruction. We need at least one.
@@ -3692,6 +3694,8 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
       if (!matchLowBitMask(Mask))
         return false;
     }
+  } else if (matchLowBitMask(SDValue(Node, 0))) {
+    X = CurDAG->getAllOnesConstant(SDLoc(Node), NVT);
   } else if (!matchPatternD(Node))
     return false;
 
@@ -5067,6 +5071,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
 
     [[fallthrough]];
   case ISD::ADD:
+    if (Opcode == ISD::ADD && matchBitExtract(Node))
+      return;
+    [[fallthrough]];
   case ISD::SUB: {
     // Try to avoid folding immediates with multiple uses for optsize.
     // This code tries to select to register form directly to avoid going

diff  --git a/llvm/test/CodeGen/X86/extract-bits.ll b/llvm/test/CodeGen/X86/extract-bits.ll
index 15d13e62b0fadb..638c4db756b445 100644
--- a/llvm/test/CodeGen/X86/extract-bits.ll
+++ b/llvm/test/CodeGen/X86/extract-bits.ll
@@ -4068,10 +4068,8 @@ define i32 @bextr32_c0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
 ; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
 ; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %esi
-; X86-BMI2-NEXT:    movl %ebx, %eax
-; X86-BMI2-NEXT:    negb %al
-; X86-BMI2-NEXT:    movl $-1, %ecx
-; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
+; X86-BMI2-NEXT:    movl $-1, %eax
+; X86-BMI2-NEXT:    bzhil %ebx, %eax, %eax
 ; X86-BMI2-NEXT:    movl %eax, (%esp)
 ; X86-BMI2-NEXT:    calll use32 at PLT
 ; X86-BMI2-NEXT:    bzhil %ebx, %esi, %eax
@@ -4131,10 +4129,8 @@ define i32 @bextr32_c0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
 ; X64-BMI2-NEXT:    pushq %rax
 ; X64-BMI2-NEXT:    movl %edx, %ebx
 ; X64-BMI2-NEXT:    shrxl %esi, %edi, %ebp
-; X64-BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI2-NEXT:    negb %al
-; X64-BMI2-NEXT:    movl $-1, %ecx
-; X64-BMI2-NEXT:    shrxl %eax, %ecx, %edi
+; X64-BMI2-NEXT:    movl $-1, %eax
+; X64-BMI2-NEXT:    bzhil %edx, %eax, %edi
 ; X64-BMI2-NEXT:    callq use32 at PLT
 ; X64-BMI2-NEXT:    bzhil %ebx, %ebp, %eax
 ; X64-BMI2-NEXT:    addq $8, %rsp
@@ -4202,10 +4198,8 @@ define i32 @bextr32_c1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
 ; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %esi
-; X86-BMI2-NEXT:    movl %ebx, %eax
-; X86-BMI2-NEXT:    negb %al
-; X86-BMI2-NEXT:    movl $-1, %ecx
-; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
+; X86-BMI2-NEXT:    movl $-1, %eax
+; X86-BMI2-NEXT:    bzhil %ebx, %eax, %eax
 ; X86-BMI2-NEXT:    movl %eax, (%esp)
 ; X86-BMI2-NEXT:    calll use32 at PLT
 ; X86-BMI2-NEXT:    bzhil %ebx, %esi, %eax
@@ -4265,10 +4259,8 @@ define i32 @bextr32_c1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; X64-BMI2-NEXT:    pushq %rax
 ; X64-BMI2-NEXT:    movl %edx, %ebx
 ; X64-BMI2-NEXT:    shrxl %esi, %edi, %ebp
-; X64-BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI2-NEXT:    negb %al
-; X64-BMI2-NEXT:    movl $-1, %ecx
-; X64-BMI2-NEXT:    shrxl %eax, %ecx, %edi
+; X64-BMI2-NEXT:    movl $-1, %eax
+; X64-BMI2-NEXT:    bzhil %edx, %eax, %edi
 ; X64-BMI2-NEXT:    callq use32 at PLT
 ; X64-BMI2-NEXT:    bzhil %ebx, %ebp, %eax
 ; X64-BMI2-NEXT:    addq $8, %rsp
@@ -4341,10 +4333,8 @@ define i32 @bextr32_c2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    shrxl %ecx, (%eax), %esi
-; X86-BMI2-NEXT:    movl %ebx, %eax
-; X86-BMI2-NEXT:    negb %al
-; X86-BMI2-NEXT:    movl $-1, %ecx
-; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
+; X86-BMI2-NEXT:    movl $-1, %eax
+; X86-BMI2-NEXT:    bzhil %ebx, %eax, %eax
 ; X86-BMI2-NEXT:    movl %eax, (%esp)
 ; X86-BMI2-NEXT:    calll use32 at PLT
 ; X86-BMI2-NEXT:    bzhil %ebx, %esi, %eax
@@ -4404,10 +4394,8 @@ define i32 @bextr32_c2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind
 ; X64-BMI2-NEXT:    pushq %rax
 ; X64-BMI2-NEXT:    movl %edx, %ebx
 ; X64-BMI2-NEXT:    shrxl %esi, (%rdi), %ebp
-; X64-BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI2-NEXT:    negb %al
-; X64-BMI2-NEXT:    movl $-1, %ecx
-; X64-BMI2-NEXT:    shrxl %eax, %ecx, %edi
+; X64-BMI2-NEXT:    movl $-1, %eax
+; X64-BMI2-NEXT:    bzhil %edx, %eax, %edi
 ; X64-BMI2-NEXT:    callq use32 at PLT
 ; X64-BMI2-NEXT:    bzhil %ebx, %ebp, %eax
 ; X64-BMI2-NEXT:    addq $8, %rsp
@@ -4479,10 +4467,8 @@ define i32 @bextr32_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    shrxl %ecx, (%eax), %esi
-; X86-BMI2-NEXT:    movl %ebx, %eax
-; X86-BMI2-NEXT:    negb %al
-; X86-BMI2-NEXT:    movl $-1, %ecx
-; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
+; X86-BMI2-NEXT:    movl $-1, %eax
+; X86-BMI2-NEXT:    bzhil %ebx, %eax, %eax
 ; X86-BMI2-NEXT:    movl %eax, (%esp)
 ; X86-BMI2-NEXT:    calll use32 at PLT
 ; X86-BMI2-NEXT:    bzhil %ebx, %esi, %eax
@@ -4542,10 +4528,8 @@ define i32 @bextr32_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
 ; X64-BMI2-NEXT:    pushq %rax
 ; X64-BMI2-NEXT:    movl %edx, %ebx
 ; X64-BMI2-NEXT:    shrxl %esi, (%rdi), %ebp
-; X64-BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI2-NEXT:    negb %al
-; X64-BMI2-NEXT:    movl $-1, %ecx
-; X64-BMI2-NEXT:    shrxl %eax, %ecx, %edi
+; X64-BMI2-NEXT:    movl $-1, %eax
+; X64-BMI2-NEXT:    bzhil %edx, %eax, %edi
 ; X64-BMI2-NEXT:    callq use32 at PLT
 ; X64-BMI2-NEXT:    bzhil %ebx, %ebp, %eax
 ; X64-BMI2-NEXT:    addq $8, %rsp
@@ -4616,10 +4600,8 @@ define i32 @bextr32_c4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits)
 ; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
 ; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %esi
-; X86-BMI2-NEXT:    movl %ebx, %eax
-; X86-BMI2-NEXT:    negb %al
-; X86-BMI2-NEXT:    movl $-1, %ecx
-; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
+; X86-BMI2-NEXT:    movl $-1, %eax
+; X86-BMI2-NEXT:    bzhil %ebx, %eax, %eax
 ; X86-BMI2-NEXT:    movl %eax, (%esp)
 ; X86-BMI2-NEXT:    calll use32 at PLT
 ; X86-BMI2-NEXT:    bzhil %ebx, %esi, %eax
@@ -4679,10 +4661,8 @@ define i32 @bextr32_c4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits)
 ; X64-BMI2-NEXT:    pushq %rax
 ; X64-BMI2-NEXT:    movl %edx, %ebx
 ; X64-BMI2-NEXT:    shrxl %esi, %edi, %ebp
-; X64-BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI2-NEXT:    negb %al
-; X64-BMI2-NEXT:    movl $-1, %ecx
-; X64-BMI2-NEXT:    shrxl %eax, %ecx, %edi
+; X64-BMI2-NEXT:    movl $-1, %eax
+; X64-BMI2-NEXT:    bzhil %edx, %eax, %edi
 ; X64-BMI2-NEXT:    callq use32 at PLT
 ; X64-BMI2-NEXT:    bzhil %ebx, %ebp, %eax
 ; X64-BMI2-NEXT:    addq $8, %rsp
@@ -4761,10 +4741,8 @@ define i32 @bextr32_c5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits
 ; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-BMI2-NEXT:    shrxl %edi, {{[0-9]+}}(%esp), %esi
-; X86-BMI2-NEXT:    movl %ebx, %eax
-; X86-BMI2-NEXT:    negb %al
-; X86-BMI2-NEXT:    movl $-1, %ecx
-; X86-BMI2-NEXT:    shrxl %eax, %ecx, %eax
+; X86-BMI2-NEXT:    movl $-1, %eax
+; X86-BMI2-NEXT:    bzhil %ebx, %eax, %eax
 ; X86-BMI2-NEXT:    movl %eax, (%esp)
 ; X86-BMI2-NEXT:    calll use32 at PLT
 ; X86-BMI2-NEXT:    bzhil %ebx, %esi, %esi
@@ -4833,10 +4811,8 @@ define i32 @bextr32_c5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits
 ; X64-BMI2-NEXT:    movl %edx, %ebx
 ; X64-BMI2-NEXT:    movl %esi, %ebp
 ; X64-BMI2-NEXT:    shrxl %esi, %edi, %r14d
-; X64-BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI2-NEXT:    negb %al
-; X64-BMI2-NEXT:    movl $-1, %ecx
-; X64-BMI2-NEXT:    shrxl %eax, %ecx, %edi
+; X64-BMI2-NEXT:    movl $-1, %eax
+; X64-BMI2-NEXT:    bzhil %edx, %eax, %edi
 ; X64-BMI2-NEXT:    callq use32 at PLT
 ; X64-BMI2-NEXT:    bzhil %ebx, %r14d, %ebx
 ; X64-BMI2-NEXT:    movl %ebp, %edi
@@ -5045,10 +5021,8 @@ define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 ; X64-BMI2-NEXT:    pushq %rax
 ; X64-BMI2-NEXT:    movq %rdx, %rbx
 ; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %r14
-; X64-BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI2-NEXT:    negb %al
-; X64-BMI2-NEXT:    movq $-1, %rcx
-; X64-BMI2-NEXT:    shrxq %rax, %rcx, %rdi
+; X64-BMI2-NEXT:    movq $-1, %rax
+; X64-BMI2-NEXT:    bzhiq %rdx, %rax, %rdi
 ; X64-BMI2-NEXT:    callq use64 at PLT
 ; X64-BMI2-NEXT:    bzhiq %rbx, %r14, %rax
 ; X64-BMI2-NEXT:    addq $8, %rsp
@@ -5252,10 +5226,8 @@ define i64 @bextr64_c1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; X64-BMI2-NEXT:    movl %edx, %ebx
 ; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %r14
-; X64-BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI2-NEXT:    negb %al
-; X64-BMI2-NEXT:    movq $-1, %rcx
-; X64-BMI2-NEXT:    shrxq %rax, %rcx, %rdi
+; X64-BMI2-NEXT:    movq $-1, %rax
+; X64-BMI2-NEXT:    bzhiq %rbx, %rax, %rdi
 ; X64-BMI2-NEXT:    callq use64 at PLT
 ; X64-BMI2-NEXT:    bzhiq %rbx, %r14, %rax
 ; X64-BMI2-NEXT:    addq $8, %rsp
@@ -5463,10 +5435,8 @@ define i64 @bextr64_c2_load(ptr %w, i64 %numskipbits, i64 %numlowbits) nounwind
 ; X64-BMI2-NEXT:    pushq %rax
 ; X64-BMI2-NEXT:    movq %rdx, %rbx
 ; X64-BMI2-NEXT:    shrxq %rsi, (%rdi), %r14
-; X64-BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI2-NEXT:    negb %al
-; X64-BMI2-NEXT:    movq $-1, %rcx
-; X64-BMI2-NEXT:    shrxq %rax, %rcx, %rdi
+; X64-BMI2-NEXT:    movq $-1, %rax
+; X64-BMI2-NEXT:    bzhiq %rdx, %rax, %rdi
 ; X64-BMI2-NEXT:    callq use64 at PLT
 ; X64-BMI2-NEXT:    bzhiq %rbx, %r14, %rax
 ; X64-BMI2-NEXT:    addq $8, %rsp
@@ -5674,10 +5644,8 @@ define i64 @bextr64_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
 ; X64-BMI2-NEXT:    movl %edx, %ebx
 ; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-BMI2-NEXT:    shrxq %rsi, (%rdi), %r14
-; X64-BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI2-NEXT:    negb %al
-; X64-BMI2-NEXT:    movq $-1, %rcx
-; X64-BMI2-NEXT:    shrxq %rax, %rcx, %rdi
+; X64-BMI2-NEXT:    movq $-1, %rax
+; X64-BMI2-NEXT:    bzhiq %rbx, %rax, %rdi
 ; X64-BMI2-NEXT:    callq use64 at PLT
 ; X64-BMI2-NEXT:    bzhiq %rbx, %r14, %rax
 ; X64-BMI2-NEXT:    addq $8, %rsp
@@ -5883,10 +5851,8 @@ define i64 @bextr64_c4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits)
 ; X64-BMI2-NEXT:    pushq %rax
 ; X64-BMI2-NEXT:    movq %rdx, %rbx
 ; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %r14
-; X64-BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI2-NEXT:    negb %al
-; X64-BMI2-NEXT:    movq $-1, %rcx
-; X64-BMI2-NEXT:    shrxq %rax, %rcx, %rdi
+; X64-BMI2-NEXT:    movq $-1, %rax
+; X64-BMI2-NEXT:    bzhiq %rdx, %rax, %rdi
 ; X64-BMI2-NEXT:    callq use64 at PLT
 ; X64-BMI2-NEXT:    bzhiq %rbx, %r14, %rax
 ; X64-BMI2-NEXT:    addq $8, %rsp
@@ -6109,10 +6075,8 @@ define i64 @bextr64_c5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits
 ; X64-BMI2-NEXT:    movq %rdx, %rbx
 ; X64-BMI2-NEXT:    movq %rsi, %r14
 ; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %r15
-; X64-BMI2-NEXT:    movl %ebx, %eax
-; X64-BMI2-NEXT:    negb %al
-; X64-BMI2-NEXT:    movq $-1, %rcx
-; X64-BMI2-NEXT:    shrxq %rax, %rcx, %rdi
+; X64-BMI2-NEXT:    movq $-1, %rax
+; X64-BMI2-NEXT:    bzhiq %rdx, %rax, %rdi
 ; X64-BMI2-NEXT:    callq use64 at PLT
 ; X64-BMI2-NEXT:    bzhiq %rbx, %r15, %rbx
 ; X64-BMI2-NEXT:    movq %r14, %rdi

diff  --git a/llvm/test/CodeGen/X86/extract-lowbits.ll b/llvm/test/CodeGen/X86/extract-lowbits.ll
index 743e305117b756..848b920490ab83 100644
--- a/llvm/test/CodeGen/X86/extract-lowbits.ll
+++ b/llvm/test/CodeGen/X86/extract-lowbits.ll
@@ -2117,16 +2117,12 @@ define i32 @bzhi32_c0(i32 %val, i32 %numlowbits, ptr %escape) nounwind {
 ;
 ; X86-BMI2-LABEL: bzhi32_c0:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    pushl %esi
-; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; X86-BMI2-NEXT:    bzhil %edx, {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    # kill: def $dl killed $dl killed $edx def $edx
-; X86-BMI2-NEXT:    negb %dl
-; X86-BMI2-NEXT:    movl $-1, %esi
-; X86-BMI2-NEXT:    shrxl %edx, %esi, %edx
-; X86-BMI2-NEXT:    movl %edx, (%ecx)
-; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movl $-1, %edx
+; X86-BMI2-NEXT:    bzhil %ecx, %edx, %edx
+; X86-BMI2-NEXT:    movl %edx, (%eax)
+; X86-BMI2-NEXT:    bzhil %ecx, {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi32_c0:
@@ -2153,12 +2149,10 @@ define i32 @bzhi32_c0(i32 %val, i32 %numlowbits, ptr %escape) nounwind {
 ;
 ; X64-BMI2-LABEL: bzhi32_c0:
 ; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movl $-1, %eax
+; X64-BMI2-NEXT:    bzhil %esi, %eax, %eax
+; X64-BMI2-NEXT:    movl %eax, (%rdx)
 ; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
-; X64-BMI2-NEXT:    # kill: def $sil killed $sil killed $esi def $esi
-; X64-BMI2-NEXT:    negb %sil
-; X64-BMI2-NEXT:    movl $-1, %ecx
-; X64-BMI2-NEXT:    shrxl %esi, %ecx, %ecx
-; X64-BMI2-NEXT:    movl %ecx, (%rdx)
 ; X64-BMI2-NEXT:    retq
   %numhighbits = sub i32 32, %numlowbits
   %mask = lshr i32 -1, %numhighbits
@@ -2194,16 +2188,12 @@ define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits, ptr %escape) nounwind
 ;
 ; X86-BMI2-LABEL: bzhi32_c1_indexzext:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    pushl %esi
-; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; X86-BMI2-NEXT:    bzhil %edx, {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    # kill: def $dl killed $dl killed $edx def $edx
-; X86-BMI2-NEXT:    negb %dl
-; X86-BMI2-NEXT:    movl $-1, %esi
-; X86-BMI2-NEXT:    shrxl %edx, %esi, %edx
-; X86-BMI2-NEXT:    movl %edx, (%ecx)
-; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movl $-1, %edx
+; X86-BMI2-NEXT:    bzhil %ecx, %edx, %edx
+; X86-BMI2-NEXT:    movl %edx, (%eax)
+; X86-BMI2-NEXT:    bzhil %ecx, {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi32_c1_indexzext:
@@ -2230,12 +2220,10 @@ define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits, ptr %escape) nounwind
 ;
 ; X64-BMI2-LABEL: bzhi32_c1_indexzext:
 ; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movl $-1, %eax
+; X64-BMI2-NEXT:    bzhil %esi, %eax, %eax
+; X64-BMI2-NEXT:    movl %eax, (%rdx)
 ; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
-; X64-BMI2-NEXT:    # kill: def $sil killed $sil killed $esi def $esi
-; X64-BMI2-NEXT:    negb %sil
-; X64-BMI2-NEXT:    movl $-1, %ecx
-; X64-BMI2-NEXT:    shrxl %esi, %ecx, %ecx
-; X64-BMI2-NEXT:    movl %ecx, (%rdx)
 ; X64-BMI2-NEXT:    retq
   %numhighbits = sub i8 32, %numlowbits
   %sh_prom = zext i8 %numhighbits to i32
@@ -2284,12 +2272,10 @@ define i32 @bzhi32_c2_load(ptr %w, i32 %numlowbits, ptr %escape) nounwind {
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; X86-BMI2-NEXT:    bzhil %edx, (%eax), %eax
-; X86-BMI2-NEXT:    # kill: def $dl killed $dl killed $edx def $edx
-; X86-BMI2-NEXT:    negb %dl
 ; X86-BMI2-NEXT:    movl $-1, %esi
-; X86-BMI2-NEXT:    shrxl %edx, %esi, %edx
-; X86-BMI2-NEXT:    movl %edx, (%ecx)
+; X86-BMI2-NEXT:    bzhil %edx, %esi, %esi
+; X86-BMI2-NEXT:    bzhil %edx, (%eax), %eax
+; X86-BMI2-NEXT:    movl %esi, (%ecx)
 ; X86-BMI2-NEXT:    popl %esi
 ; X86-BMI2-NEXT:    retl
 ;
@@ -2319,11 +2305,9 @@ define i32 @bzhi32_c2_load(ptr %w, i32 %numlowbits, ptr %escape) nounwind {
 ;
 ; X64-BMI2-LABEL: bzhi32_c2_load:
 ; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movl $-1, %eax
+; X64-BMI2-NEXT:    bzhil %esi, %eax, %ecx
 ; X64-BMI2-NEXT:    bzhil %esi, (%rdi), %eax
-; X64-BMI2-NEXT:    # kill: def $sil killed $sil killed $esi def $esi
-; X64-BMI2-NEXT:    negb %sil
-; X64-BMI2-NEXT:    movl $-1, %ecx
-; X64-BMI2-NEXT:    shrxl %esi, %ecx, %ecx
 ; X64-BMI2-NEXT:    movl %ecx, (%rdx)
 ; X64-BMI2-NEXT:    retq
   %val = load i32, ptr %w
@@ -2373,12 +2357,10 @@ define i32 @bzhi32_c3_load_indexzext(ptr %w, i8 %numlowbits, ptr %escape) nounwi
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; X86-BMI2-NEXT:    bzhil %edx, (%eax), %eax
-; X86-BMI2-NEXT:    # kill: def $dl killed $dl killed $edx def $edx
-; X86-BMI2-NEXT:    negb %dl
 ; X86-BMI2-NEXT:    movl $-1, %esi
-; X86-BMI2-NEXT:    shrxl %edx, %esi, %edx
-; X86-BMI2-NEXT:    movl %edx, (%ecx)
+; X86-BMI2-NEXT:    bzhil %edx, %esi, %esi
+; X86-BMI2-NEXT:    bzhil %edx, (%eax), %eax
+; X86-BMI2-NEXT:    movl %esi, (%ecx)
 ; X86-BMI2-NEXT:    popl %esi
 ; X86-BMI2-NEXT:    retl
 ;
@@ -2408,11 +2390,9 @@ define i32 @bzhi32_c3_load_indexzext(ptr %w, i8 %numlowbits, ptr %escape) nounwi
 ;
 ; X64-BMI2-LABEL: bzhi32_c3_load_indexzext:
 ; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movl $-1, %eax
+; X64-BMI2-NEXT:    bzhil %esi, %eax, %ecx
 ; X64-BMI2-NEXT:    bzhil %esi, (%rdi), %eax
-; X64-BMI2-NEXT:    # kill: def $sil killed $sil killed $esi def $esi
-; X64-BMI2-NEXT:    negb %sil
-; X64-BMI2-NEXT:    movl $-1, %ecx
-; X64-BMI2-NEXT:    shrxl %esi, %ecx, %ecx
 ; X64-BMI2-NEXT:    movl %ecx, (%rdx)
 ; X64-BMI2-NEXT:    retq
   %val = load i32, ptr %w
@@ -2451,16 +2431,12 @@ define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits, ptr %escape) nounwi
 ;
 ; X86-BMI2-LABEL: bzhi32_c4_commutative:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    pushl %esi
-; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
-; X86-BMI2-NEXT:    bzhil %edx, {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    # kill: def $dl killed $dl killed $edx def $edx
-; X86-BMI2-NEXT:    negb %dl
-; X86-BMI2-NEXT:    movl $-1, %esi
-; X86-BMI2-NEXT:    shrxl %edx, %esi, %edx
-; X86-BMI2-NEXT:    movl %edx, (%ecx)
-; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movl $-1, %edx
+; X86-BMI2-NEXT:    bzhil %ecx, %edx, %edx
+; X86-BMI2-NEXT:    movl %edx, (%eax)
+; X86-BMI2-NEXT:    bzhil %ecx, {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi32_c4_commutative:
@@ -2487,12 +2463,10 @@ define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits, ptr %escape) nounwi
 ;
 ; X64-BMI2-LABEL: bzhi32_c4_commutative:
 ; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movl $-1, %eax
+; X64-BMI2-NEXT:    bzhil %esi, %eax, %eax
+; X64-BMI2-NEXT:    movl %eax, (%rdx)
 ; X64-BMI2-NEXT:    bzhil %esi, %edi, %eax
-; X64-BMI2-NEXT:    # kill: def $sil killed $sil killed $esi def $esi
-; X64-BMI2-NEXT:    negb %sil
-; X64-BMI2-NEXT:    movl $-1, %ecx
-; X64-BMI2-NEXT:    shrxl %esi, %ecx, %ecx
-; X64-BMI2-NEXT:    movl %ecx, (%rdx)
 ; X64-BMI2-NEXT:    retq
   %numhighbits = sub i32 32, %numlowbits
   %mask = lshr i32 -1, %numhighbits
@@ -2593,12 +2567,10 @@ define i64 @bzhi64_c0(i64 %val, i64 %numlowbits, ptr %escape) nounwind {
 ;
 ; X64-BMI2-LABEL: bzhi64_c0:
 ; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movq $-1, %rax
+; X64-BMI2-NEXT:    bzhiq %rsi, %rax, %rax
+; X64-BMI2-NEXT:    movq %rax, (%rdx)
 ; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
-; X64-BMI2-NEXT:    # kill: def $sil killed $sil killed $rsi def $rsi
-; X64-BMI2-NEXT:    negb %sil
-; X64-BMI2-NEXT:    movq $-1, %rcx
-; X64-BMI2-NEXT:    shrxq %rsi, %rcx, %rcx
-; X64-BMI2-NEXT:    movq %rcx, (%rdx)
 ; X64-BMI2-NEXT:    retq
   %numhighbits = sub i64 64, %numlowbits
   %mask = lshr i64 -1, %numhighbits
@@ -2698,12 +2670,10 @@ define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits, ptr %escape) nounwind
 ; X64-BMI2-LABEL: bzhi64_c1_indexzext:
 ; X64-BMI2:       # %bb.0:
 ; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-BMI2-NEXT:    movq $-1, %rax
+; X64-BMI2-NEXT:    bzhiq %rsi, %rax, %rax
+; X64-BMI2-NEXT:    movq %rax, (%rdx)
 ; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
-; X64-BMI2-NEXT:    # kill: def $sil killed $sil killed $rsi def $rsi
-; X64-BMI2-NEXT:    negb %sil
-; X64-BMI2-NEXT:    movq $-1, %rcx
-; X64-BMI2-NEXT:    shrxq %rsi, %rcx, %rcx
-; X64-BMI2-NEXT:    movq %rcx, (%rdx)
 ; X64-BMI2-NEXT:    retq
   %numhighbits = sub i8 64, %numlowbits
   %sh_prom = zext i8 %numhighbits to i64
@@ -2824,11 +2794,9 @@ define i64 @bzhi64_c2_load(ptr %w, i64 %numlowbits, ptr %escape) nounwind {
 ;
 ; X64-BMI2-LABEL: bzhi64_c2_load:
 ; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movq $-1, %rax
+; X64-BMI2-NEXT:    bzhiq %rsi, %rax, %rcx
 ; X64-BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
-; X64-BMI2-NEXT:    # kill: def $sil killed $sil killed $rsi def $rsi
-; X64-BMI2-NEXT:    negb %sil
-; X64-BMI2-NEXT:    movq $-1, %rcx
-; X64-BMI2-NEXT:    shrxq %rsi, %rcx, %rcx
 ; X64-BMI2-NEXT:    movq %rcx, (%rdx)
 ; X64-BMI2-NEXT:    retq
   %val = load i64, ptr %w
@@ -2951,11 +2919,9 @@ define i64 @bzhi64_c3_load_indexzext(ptr %w, i8 %numlowbits, ptr %escape) nounwi
 ; X64-BMI2-LABEL: bzhi64_c3_load_indexzext:
 ; X64-BMI2:       # %bb.0:
 ; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-BMI2-NEXT:    movq $-1, %rax
+; X64-BMI2-NEXT:    bzhiq %rsi, %rax, %rcx
 ; X64-BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
-; X64-BMI2-NEXT:    # kill: def $sil killed $sil killed $rsi def $rsi
-; X64-BMI2-NEXT:    negb %sil
-; X64-BMI2-NEXT:    movq $-1, %rcx
-; X64-BMI2-NEXT:    shrxq %rsi, %rcx, %rcx
 ; X64-BMI2-NEXT:    movq %rcx, (%rdx)
 ; X64-BMI2-NEXT:    retq
   %val = load i64, ptr %w
@@ -3057,12 +3023,10 @@ define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits, ptr %escape) nounwi
 ;
 ; X64-BMI2-LABEL: bzhi64_c4_commutative:
 ; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movq $-1, %rax
+; X64-BMI2-NEXT:    bzhiq %rsi, %rax, %rax
+; X64-BMI2-NEXT:    movq %rax, (%rdx)
 ; X64-BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
-; X64-BMI2-NEXT:    # kill: def $sil killed $sil killed $rsi def $rsi
-; X64-BMI2-NEXT:    negb %sil
-; X64-BMI2-NEXT:    movq $-1, %rcx
-; X64-BMI2-NEXT:    shrxq %rsi, %rcx, %rcx
-; X64-BMI2-NEXT:    movq %rcx, (%rdx)
 ; X64-BMI2-NEXT:    retq
   %numhighbits = sub i64 64, %numlowbits
   %mask = lshr i64 -1, %numhighbits


        


More information about the llvm-commits mailing list