[llvm] r334125 - [X86] Emit BZHI when mask is ~(-1 << nbits))

Roman Lebedev via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 6 12:38:16 PDT 2018


Author: lebedevri
Date: Wed Jun  6 12:38:16 2018
New Revision: 334125

URL: http://llvm.org/viewvc/llvm-project?rev=334125&view=rev
Log:
[X86] Emit BZHI when mask is ~(-1 << nbits))

Summary:
In D47428, i propose to choose the `~(-(1 << nbits))` as the canonical form of low-bit-mask formation.
As it is seen from these tests, there is a reason for that.

AArch64 currently better handles `~(-(1 << nbits))`, but not the more traditional `(1 << nbits) - 1` (sic!).
The other way around for X86.
It would be much better to canonicalize.

This patch is completely monkey-typing.
I don't really understand how this works :)
I have based it on `// x & (-1 >> (32 - y))` pattern.

Also, when we only have `BMI`, i wonder if we could use `BEXTR` with `start=0` ?

Related links:
https://bugs.llvm.org/show_bug.cgi?id=36419
https://bugs.llvm.org/show_bug.cgi?id=37603
https://bugs.llvm.org/show_bug.cgi?id=37610
https://rise4fun.com/Alive/idM

Reviewers: craig.topper, spatel, RKSimon, javed.absar

Reviewed By: craig.topper

Subscribers: kristof.beyls, llvm-commits

Differential Revision: https://reviews.llvm.org/D47453

Modified:
    llvm/trunk/lib/Target/X86/X86InstrInfo.td
    llvm/trunk/test/CodeGen/X86/extract-lowbits.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=334125&r1=334124&r2=334125&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Wed Jun  6 12:38:16 2018
@@ -2448,21 +2448,49 @@ let Predicates = [HasBMI2, NoTBM] in {
 }
 
 let Predicates = [HasBMI2] in {
-  def : Pat<(and GR32:$src, (add (shl 1, GR8:$lz), -1)),
-            (BZHI32rr GR32:$src,
-              (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>;
-
-  def : Pat<(and (loadi32 addr:$src), (add (shl 1, GR8:$lz), -1)),
-            (BZHI32rm addr:$src,
-              (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>;
-
-  def : Pat<(and GR64:$src, (add (shl 1, GR8:$lz), -1)),
-            (BZHI64rr GR64:$src,
-              (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>;
-
-  def : Pat<(and (loadi64 addr:$src), (add (shl 1, GR8:$lz), -1)),
-            (BZHI64rm addr:$src,
-              (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>;
+  multiclass _bmi_bzhi_pattern<dag regpattern, dag mempattern, RegisterClass RC,
+                               ValueType VT, Instruction DstInst,
+                               Instruction DstMemInst> {
+    def : Pat<regpattern,
+              (DstInst RC:$src,
+                (INSERT_SUBREG (VT (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>;
+    def : Pat<mempattern,
+              (DstMemInst addr:$src,
+                (INSERT_SUBREG (VT (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>;
+  }
+
+  multiclass bmi_bzhi_patterns<RegisterClass RC, int bitwidth, ValueType VT,
+                               Instruction DstInst, X86MemOperand x86memop,
+                               Instruction DstMemInst> {
+    // x & ((1 << y) - 1)
+    defm : _bmi_bzhi_pattern<(and RC:$src, (add (shl 1, GR8:$lz), -1)),
+                             (and (x86memop addr:$src),
+                                  (add (shl 1, GR8:$lz), -1)),
+                             RC, VT, DstInst, DstMemInst>;
+
+    // x & ~(-1 << y)
+    defm : _bmi_bzhi_pattern<(and RC:$src, (xor (shl -1, GR8:$lz), -1)),
+                             (and (x86memop addr:$src),
+                                  (xor (shl -1, GR8:$lz), -1)),
+                             RC, VT, DstInst, DstMemInst>;
+
+    // x & (-1 >> (bitwidth - y))
+    defm : _bmi_bzhi_pattern<(and RC:$src, (srl -1, (sub bitwidth, GR8:$lz))),
+                             (and (x86memop addr:$src),
+                                  (srl -1, (sub bitwidth, GR8:$lz))),
+                             RC, VT, DstInst, DstMemInst>;
+
+    // x << (bitwidth - y) >> (bitwidth - y)
+    defm : _bmi_bzhi_pattern<(srl (shl RC:$src, (sub bitwidth, GR8:$lz)),
+                                  (sub bitwidth, GR8:$lz)),
+                             (srl (shl (x86memop addr:$src),
+                                        (sub bitwidth, GR8:$lz)),
+                                  (sub bitwidth, GR8:$lz)),
+                             RC, VT, DstInst, DstMemInst>;
+  }
+
+  defm : bmi_bzhi_patterns<GR32, 32, i32, BZHI32rr, loadi32, BZHI32rm>;
+  defm : bmi_bzhi_patterns<GR64, 64, i64, BZHI64rr, loadi64, BZHI64rm>;
 
   // x & (-1 >> (32 - y))
   def : Pat<(and GR32:$src, (srl -1, (i8 (trunc (sub 32, GR32:$lz))))),

Modified: llvm/trunk/test/CodeGen/X86/extract-lowbits.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/extract-lowbits.ll?rev=334125&r1=334124&r2=334125&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/extract-lowbits.ll (original)
+++ llvm/trunk/test/CodeGen/X86/extract-lowbits.ll Wed Jun  6 12:38:16 2018
@@ -537,9 +537,7 @@ define i32 @bzhi32_b0(i32 %val, i32 %num
 ; X86-BMI1BMI2-LABEL: bzhi32_b0:
 ; X86-BMI1BMI2:       # %bb.0:
 ; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
-; X86-BMI1BMI2-NEXT:    shlxl %eax, %ecx, %eax
-; X86-BMI1BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi32_b0:
@@ -553,9 +551,7 @@ define i32 @bzhi32_b0(i32 %val, i32 %num
 ;
 ; X64-BMI1BMI2-LABEL: bzhi32_b0:
 ; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    movl $-1, %eax
-; X64-BMI1BMI2-NEXT:    shlxl %esi, %eax, %eax
-; X64-BMI1BMI2-NEXT:    andnl %edi, %eax, %eax
+; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
 ; X64-BMI1BMI2-NEXT:    retq
   %notmask = shl i32 -1, %numlowbits
   %mask = xor i32 %notmask, -1
@@ -576,9 +572,7 @@ define i32 @bzhi32_b1_indexzext(i32 %val
 ; X86-BMI1BMI2-LABEL: bzhi32_b1_indexzext:
 ; X86-BMI1BMI2:       # %bb.0:
 ; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
-; X86-BMI1BMI2-NEXT:    shlxl %eax, %ecx, %eax
-; X86-BMI1BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi32_b1_indexzext:
@@ -592,9 +586,7 @@ define i32 @bzhi32_b1_indexzext(i32 %val
 ;
 ; X64-BMI1BMI2-LABEL: bzhi32_b1_indexzext:
 ; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    movl $-1, %eax
-; X64-BMI1BMI2-NEXT:    shlxl %esi, %eax, %eax
-; X64-BMI1BMI2-NEXT:    andnl %edi, %eax, %eax
+; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
 ; X64-BMI1BMI2-NEXT:    retq
   %conv = zext i8 %numlowbits to i32
   %notmask = shl i32 -1, %conv
@@ -618,9 +610,7 @@ define i32 @bzhi32_b2_load(i32* %w, i32
 ; X86-BMI1BMI2:       # %bb.0:
 ; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl $-1, %edx
-; X86-BMI1BMI2-NEXT:    shlxl %ecx, %edx, %ecx
-; X86-BMI1BMI2-NEXT:    andnl (%eax), %ecx, %eax
+; X86-BMI1BMI2-NEXT:    bzhil %ecx, (%eax), %eax
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi32_b2_load:
@@ -634,9 +624,7 @@ define i32 @bzhi32_b2_load(i32* %w, i32
 ;
 ; X64-BMI1BMI2-LABEL: bzhi32_b2_load:
 ; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    movl $-1, %eax
-; X64-BMI1BMI2-NEXT:    shlxl %esi, %eax, %eax
-; X64-BMI1BMI2-NEXT:    andnl (%rdi), %eax, %eax
+; X64-BMI1BMI2-NEXT:    bzhil %esi, (%rdi), %eax
 ; X64-BMI1BMI2-NEXT:    retq
   %val = load i32, i32* %w
   %notmask = shl i32 -1, %numlowbits
@@ -660,9 +648,7 @@ define i32 @bzhi32_b3_load_indexzext(i32
 ; X86-BMI1BMI2:       # %bb.0:
 ; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    movl $-1, %edx
-; X86-BMI1BMI2-NEXT:    shlxl %ecx, %edx, %ecx
-; X86-BMI1BMI2-NEXT:    andnl (%eax), %ecx, %eax
+; X86-BMI1BMI2-NEXT:    bzhil %ecx, (%eax), %eax
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi32_b3_load_indexzext:
@@ -676,9 +662,7 @@ define i32 @bzhi32_b3_load_indexzext(i32
 ;
 ; X64-BMI1BMI2-LABEL: bzhi32_b3_load_indexzext:
 ; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    movl $-1, %eax
-; X64-BMI1BMI2-NEXT:    shlxl %esi, %eax, %eax
-; X64-BMI1BMI2-NEXT:    andnl (%rdi), %eax, %eax
+; X64-BMI1BMI2-NEXT:    bzhil %esi, (%rdi), %eax
 ; X64-BMI1BMI2-NEXT:    retq
   %val = load i32, i32* %w
   %conv = zext i8 %numlowbits to i32
@@ -701,9 +685,7 @@ define i32 @bzhi32_b4_commutative(i32 %v
 ; X86-BMI1BMI2-LABEL: bzhi32_b4_commutative:
 ; X86-BMI1BMI2:       # %bb.0:
 ; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
-; X86-BMI1BMI2-NEXT:    shlxl %eax, %ecx, %eax
-; X86-BMI1BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi32_b4_commutative:
@@ -717,9 +699,7 @@ define i32 @bzhi32_b4_commutative(i32 %v
 ;
 ; X64-BMI1BMI2-LABEL: bzhi32_b4_commutative:
 ; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    movl $-1, %eax
-; X64-BMI1BMI2-NEXT:    shlxl %esi, %eax, %eax
-; X64-BMI1BMI2-NEXT:    andnl %edi, %eax, %eax
+; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
 ; X64-BMI1BMI2-NEXT:    retq
   %notmask = shl i32 -1, %numlowbits
   %mask = xor i32 %notmask, -1
@@ -778,9 +758,7 @@ define i64 @bzhi64_b0(i64 %val, i64 %num
 ;
 ; X64-BMI1BMI2-LABEL: bzhi64_b0:
 ; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    movq $-1, %rax
-; X64-BMI1BMI2-NEXT:    shlxq %rsi, %rax, %rax
-; X64-BMI1BMI2-NEXT:    andnq %rdi, %rax, %rax
+; X64-BMI1BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
 ; X64-BMI1BMI2-NEXT:    retq
   %notmask = shl i64 -1, %numlowbits
   %mask = xor i64 %notmask, -1
@@ -838,9 +816,7 @@ define i64 @bzhi64_b1_indexzext(i64 %val
 ; X64-BMI1BMI2-LABEL: bzhi64_b1_indexzext:
 ; X64-BMI1BMI2:       # %bb.0:
 ; X64-BMI1BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-BMI1BMI2-NEXT:    movq $-1, %rax
-; X64-BMI1BMI2-NEXT:    shlxq %rsi, %rax, %rax
-; X64-BMI1BMI2-NEXT:    andnq %rdi, %rax, %rax
+; X64-BMI1BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
 ; X64-BMI1BMI2-NEXT:    retq
   %conv = zext i8 %numlowbits to i64
   %notmask = shl i64 -1, %conv
@@ -902,9 +878,7 @@ define i64 @bzhi64_b2_load(i64* %w, i64
 ;
 ; X64-BMI1BMI2-LABEL: bzhi64_b2_load:
 ; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    movq $-1, %rax
-; X64-BMI1BMI2-NEXT:    shlxq %rsi, %rax, %rax
-; X64-BMI1BMI2-NEXT:    andnq (%rdi), %rax, %rax
+; X64-BMI1BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
 ; X64-BMI1BMI2-NEXT:    retq
   %val = load i64, i64* %w
   %notmask = shl i64 -1, %numlowbits
@@ -967,9 +941,7 @@ define i64 @bzhi64_b3_load_indexzext(i64
 ; X64-BMI1BMI2-LABEL: bzhi64_b3_load_indexzext:
 ; X64-BMI1BMI2:       # %bb.0:
 ; X64-BMI1BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-BMI1BMI2-NEXT:    movq $-1, %rax
-; X64-BMI1BMI2-NEXT:    shlxq %rsi, %rax, %rax
-; X64-BMI1BMI2-NEXT:    andnq (%rdi), %rax, %rax
+; X64-BMI1BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
 ; X64-BMI1BMI2-NEXT:    retq
   %val = load i64, i64* %w
   %conv = zext i8 %numlowbits to i64
@@ -1028,9 +1000,7 @@ define i64 @bzhi64_b4_commutative(i64 %v
 ;
 ; X64-BMI1BMI2-LABEL: bzhi64_b4_commutative:
 ; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    movq $-1, %rax
-; X64-BMI1BMI2-NEXT:    shlxq %rsi, %rax, %rax
-; X64-BMI1BMI2-NEXT:    andnq %rdi, %rax, %rax
+; X64-BMI1BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
 ; X64-BMI1BMI2-NEXT:    retq
   %notmask = shl i64 -1, %numlowbits
   %mask = xor i64 %notmask, -1
@@ -1091,11 +1061,8 @@ define i32 @bzhi32_c1_indexzext(i32 %val
 ;
 ; X86-BMI1BMI2-LABEL: bzhi32_c1_indexzext:
 ; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb $32, %al
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movl $-1, %ecx
-; X86-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %eax
-; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi32_c1_indexzext:
@@ -1109,11 +1076,7 @@ define i32 @bzhi32_c1_indexzext(i32 %val
 ;
 ; X64-BMI1BMI2-LABEL: bzhi32_c1_indexzext:
 ; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    movb $32, %al
-; X64-BMI1BMI2-NEXT:    subb %sil, %al
-; X64-BMI1BMI2-NEXT:    movl $-1, %ecx
-; X64-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %eax
-; X64-BMI1BMI2-NEXT:    andl %edi, %eax
+; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
 ; X64-BMI1BMI2-NEXT:    retq
   %numhighbits = sub i8 32, %numlowbits
   %sh_prom = zext i8 %numhighbits to i32
@@ -1175,12 +1138,9 @@ define i32 @bzhi32_c3_load_indexzext(i32
 ;
 ; X86-BMI1BMI2-LABEL: bzhi32_c3_load_indexzext:
 ; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    movb $32, %al
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    movl $-1, %edx
-; X86-BMI1BMI2-NEXT:    shrxl %eax, %edx, %eax
-; X86-BMI1BMI2-NEXT:    andl (%ecx), %eax
+; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1BMI2-NEXT:    bzhil %ecx, (%eax), %eax
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi32_c3_load_indexzext:
@@ -1194,11 +1154,7 @@ define i32 @bzhi32_c3_load_indexzext(i32
 ;
 ; X64-BMI1BMI2-LABEL: bzhi32_c3_load_indexzext:
 ; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    movb $32, %al
-; X64-BMI1BMI2-NEXT:    subb %sil, %al
-; X64-BMI1BMI2-NEXT:    movl $-1, %ecx
-; X64-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %eax
-; X64-BMI1BMI2-NEXT:    andl (%rdi), %eax
+; X64-BMI1BMI2-NEXT:    bzhil %esi, (%rdi), %eax
 ; X64-BMI1BMI2-NEXT:    retq
   %val = load i32, i32* %w
   %numhighbits = sub i8 32, %numlowbits
@@ -1350,11 +1306,8 @@ define i64 @bzhi64_c1_indexzext(i64 %val
 ;
 ; X64-BMI1BMI2-LABEL: bzhi64_c1_indexzext:
 ; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    movb $64, %al
-; X64-BMI1BMI2-NEXT:    subb %sil, %al
-; X64-BMI1BMI2-NEXT:    movq $-1, %rcx
-; X64-BMI1BMI2-NEXT:    shrxq %rax, %rcx, %rax
-; X64-BMI1BMI2-NEXT:    andq %rdi, %rax
+; X64-BMI1BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-BMI1BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
 ; X64-BMI1BMI2-NEXT:    retq
   %numhighbits = sub i8 64, %numlowbits
   %sh_prom = zext i8 %numhighbits to i64
@@ -1479,11 +1432,8 @@ define i64 @bzhi64_c3_load_indexzext(i64
 ;
 ; X64-BMI1BMI2-LABEL: bzhi64_c3_load_indexzext:
 ; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    movb $64, %al
-; X64-BMI1BMI2-NEXT:    subb %sil, %al
-; X64-BMI1BMI2-NEXT:    movq $-1, %rcx
-; X64-BMI1BMI2-NEXT:    shrxq %rax, %rcx, %rax
-; X64-BMI1BMI2-NEXT:    andq (%rdi), %rax
+; X64-BMI1BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-BMI1BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
 ; X64-BMI1BMI2-NEXT:    retq
   %val = load i64, i64* %w
   %numhighbits = sub i8 64, %numlowbits
@@ -1602,10 +1552,8 @@ define i32 @bzhi32_d1_indexzext(i32 %val
 ;
 ; X86-BMI1BMI2-LABEL: bzhi32_d1_indexzext:
 ; X86-BMI1BMI2:       # %bb.0:
-; X86-BMI1BMI2-NEXT:    movb $32, %al
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT:    shlxl %eax, {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %eax
+; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi32_d1_indexzext:
@@ -1619,10 +1567,7 @@ define i32 @bzhi32_d1_indexzext(i32 %val
 ;
 ; X64-BMI1BMI2-LABEL: bzhi32_d1_indexzext:
 ; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    movb $32, %al
-; X64-BMI1BMI2-NEXT:    subb %sil, %al
-; X64-BMI1BMI2-NEXT:    shlxl %eax, %edi, %ecx
-; X64-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %eax
+; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
 ; X64-BMI1BMI2-NEXT:    retq
   %numhighbits = sub i8 32, %numlowbits
   %sh_prom = zext i8 %numhighbits to i32
@@ -1685,10 +1630,8 @@ define i32 @bzhi32_d3_load_indexzext(i32
 ; X86-BMI1BMI2-LABEL: bzhi32_d3_load_indexzext:
 ; X86-BMI1BMI2:       # %bb.0:
 ; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT:    movb $32, %cl
-; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT:    shlxl %ecx, (%eax), %eax
-; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %eax
+; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1BMI2-NEXT:    bzhil %ecx, (%eax), %eax
 ; X86-BMI1BMI2-NEXT:    retl
 ;
 ; X64-NOBMI-LABEL: bzhi32_d3_load_indexzext:
@@ -1702,10 +1645,7 @@ define i32 @bzhi32_d3_load_indexzext(i32
 ;
 ; X64-BMI1BMI2-LABEL: bzhi32_d3_load_indexzext:
 ; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    movb $32, %al
-; X64-BMI1BMI2-NEXT:    subb %sil, %al
-; X64-BMI1BMI2-NEXT:    shlxl %eax, (%rdi), %ecx
-; X64-BMI1BMI2-NEXT:    shrxl %eax, %ecx, %eax
+; X64-BMI1BMI2-NEXT:    bzhil %esi, (%rdi), %eax
 ; X64-BMI1BMI2-NEXT:    retq
   %val = load i32, i32* %w
   %numhighbits = sub i8 32, %numlowbits
@@ -1892,10 +1832,8 @@ define i64 @bzhi64_d1_indexzext(i64 %val
 ;
 ; X64-BMI1BMI2-LABEL: bzhi64_d1_indexzext:
 ; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    movb $64, %al
-; X64-BMI1BMI2-NEXT:    subb %sil, %al
-; X64-BMI1BMI2-NEXT:    shlxq %rax, %rdi, %rcx
-; X64-BMI1BMI2-NEXT:    shrxq %rax, %rcx, %rax
+; X64-BMI1BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-BMI1BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
 ; X64-BMI1BMI2-NEXT:    retq
   %numhighbits = sub i8 64, %numlowbits
   %sh_prom = zext i8 %numhighbits to i64
@@ -2084,10 +2022,8 @@ define i64 @bzhi64_d3_load_indexzext(i64
 ;
 ; X64-BMI1BMI2-LABEL: bzhi64_d3_load_indexzext:
 ; X64-BMI1BMI2:       # %bb.0:
-; X64-BMI1BMI2-NEXT:    movb $64, %al
-; X64-BMI1BMI2-NEXT:    subb %sil, %al
-; X64-BMI1BMI2-NEXT:    shlxq %rax, (%rdi), %rcx
-; X64-BMI1BMI2-NEXT:    shrxq %rax, %rcx, %rax
+; X64-BMI1BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-BMI1BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
 ; X64-BMI1BMI2-NEXT:    retq
   %val = load i64, i64* %w
   %numhighbits = sub i8 64, %numlowbits




More information about the llvm-commits mailing list