[llvm] r349962 - [X86] Add isel patterns to match BMI/TBMI instructions when lowering has turned the root nodes into one of the flag producing binops.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 21 13:42:43 PST 2018
Author: ctopper
Date: Fri Dec 21 13:42:43 2018
New Revision: 349962
URL: http://llvm.org/viewvc/llvm-project?rev=349962&view=rev
Log:
[X86] Add isel patterns to match BMI/TBMI instructions when lowering has turned the root nodes into one of the flag producing binops.
This fixes the patterns that have or/and as a root. 'and' is handled differently since thy usually have a CMP wrapped around them.
I had to look for uses of the CF flag because all these nodes have non-standard CF flag behavior. A real or/xor would always clear CF. In practice we shouldn't be using the CF flag from these nodes as far as I know.
Differential Revision: https://reviews.llvm.org/D55813
Modified:
llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
llvm/trunk/lib/Target/X86/X86InstrInfo.td
llvm/trunk/test/CodeGen/X86/bmi.ll
llvm/trunk/test/CodeGen/X86/tbm_patterns.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=349962&r1=349961&r2=349962&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Fri Dec 21 13:42:43 2018
@@ -472,6 +472,9 @@ namespace {
SDValue &InFlag);
bool tryOptimizeRem8Extend(SDNode *N);
+
+ bool hasNoSignFlagUses(SDValue Flags) const;
+ bool hasNoCarryFlagUses(SDValue Flags) const;
};
}
@@ -2225,7 +2228,7 @@ static X86::CondCode getCondFromOpc(unsi
/// Test whether the given X86ISD::CMP node has any uses which require the SF
/// flag to be accurate.
-static bool hasNoSignFlagUses(SDValue Flags) {
+bool X86DAGToDAGISel::hasNoSignFlagUses(SDValue Flags) const {
// Examine each user of the node.
for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
UI != UE; ++UI) {
@@ -2265,7 +2268,7 @@ static bool hasNoSignFlagUses(SDValue Fl
/// Test whether the given node which sets flags has any uses which require the
/// CF flag to be accurate.
-static bool hasNoCarryFlagUses(SDValue Flags) {
+ bool X86DAGToDAGISel::hasNoCarryFlagUses(SDValue Flags) const {
// Examine each user of the node.
for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
UI != UE; ++UI) {
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=349962&r1=349961&r2=349962&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Fri Dec 21 13:42:43 2018
@@ -2387,6 +2387,16 @@ let Predicates = [HasBMI], Defs = [EFLAG
// Pattern fragments to auto generate BMI instructions.
//===----------------------------------------------------------------------===//
+def or_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86or_flag node:$lhs, node:$rhs), [{
+ return hasNoCarryFlagUses(SDValue(N, 1));
+}]>;
+
+def xor_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86xor_flag node:$lhs, node:$rhs), [{
+ return hasNoCarryFlagUses(SDValue(N, 1));
+}]>;
+
let Predicates = [HasBMI] in {
// FIXME: patterns for the load versions are not implemented
def : Pat<(and GR32:$src, (add GR32:$src, -1)),
@@ -2403,6 +2413,14 @@ let Predicates = [HasBMI] in {
(BLSI32rr GR32:$src)>;
def : Pat<(and GR64:$src, (ineg GR64:$src)),
(BLSI64rr GR64:$src)>;
+
+ // Versions to match flag producing ops.
+ // X86and_flag nodes are rarely created. Those should use CMP+AND. We do
+ // TESTrr matching in PostProcessISelDAG to allow BLSR/BLSI to be formed.
+ def : Pat<(xor_flag_nocf GR32:$src, (add GR32:$src, -1)),
+ (BLSMSK32rr GR32:$src)>;
+ def : Pat<(xor_flag_nocf GR64:$src, (add GR64:$src, -1)),
+ (BLSMSK64rr GR64:$src)>;
}
multiclass bmi_bextr<bits<8> opc, string mnemonic, RegisterClass RC,
@@ -2801,6 +2819,45 @@ let Predicates = [HasTBM] in {
(TZMSK32rr GR32:$src)>;
def : Pat<(and (not GR64:$src), (add GR64:$src, -1)),
(TZMSK64rr GR64:$src)>;
+
+ // Patterns to match flag producing ops.
+ // X86and_flag nodes are rarely created. Those should use CMP+AND. We do
+ // TESTrr matching in PostProcessISelDAG to allow BLSR/BLSI to be formed.
+ def : Pat<(or_flag_nocf GR32:$src, (not (add GR32:$src, 1))),
+ (BLCI32rr GR32:$src)>;
+ def : Pat<(or_flag_nocf GR64:$src, (not (add GR64:$src, 1))),
+ (BLCI64rr GR64:$src)>;
+
+ // Extra patterns because opt can optimize the above patterns to this.
+ def : Pat<(or_flag_nocf GR32:$src, (sub -2, GR32:$src)),
+ (BLCI32rr GR32:$src)>;
+ def : Pat<(or_flag_nocf GR64:$src, (sub -2, GR64:$src)),
+ (BLCI64rr GR64:$src)>;
+
+ def : Pat<(xor_flag_nocf GR32:$src, (add GR32:$src, 1)),
+ (BLCMSK32rr GR32:$src)>;
+ def : Pat<(xor_flag_nocf GR64:$src, (add GR64:$src, 1)),
+ (BLCMSK64rr GR64:$src)>;
+
+ def : Pat<(or_flag_nocf GR32:$src, (add GR32:$src, 1)),
+ (BLCS32rr GR32:$src)>;
+ def : Pat<(or_flag_nocf GR64:$src, (add GR64:$src, 1)),
+ (BLCS64rr GR64:$src)>;
+
+ def : Pat<(or_flag_nocf GR32:$src, (add GR32:$src, -1)),
+ (BLSFILL32rr GR32:$src)>;
+ def : Pat<(or_flag_nocf GR64:$src, (add GR64:$src, -1)),
+ (BLSFILL64rr GR64:$src)>;
+
+ def : Pat<(or_flag_nocf (not GR32:$src), (add GR32:$src, -1)),
+ (BLSIC32rr GR32:$src)>;
+ def : Pat<(or_flag_nocf (not GR64:$src), (add GR64:$src, -1)),
+ (BLSIC64rr GR64:$src)>;
+
+ def : Pat<(or_flag_nocf (not GR32:$src), (add GR32:$src, 1)),
+ (T1MSKC32rr GR32:$src)>;
+ def : Pat<(or_flag_nocf (not GR64:$src), (add GR64:$src, 1)),
+ (T1MSKC64rr GR64:$src)>;
} // HasTBM
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/test/CodeGen/X86/bmi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bmi.ll?rev=349962&r1=349961&r2=349962&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bmi.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bmi.ll Fri Dec 21 13:42:43 2018
@@ -691,9 +691,7 @@ define i32 @blsmsk32_z(i32 %a, i32 %b) n
define i32 @blsmsk32_z2(i32 %a, i32 %b, i32 %c) nounwind {
; X86-LABEL: blsmsk32_z2:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: leal -1(%eax), %ecx
-; X86-NEXT: xorl %eax, %ecx
+; X86-NEXT: blsmskl {{[0-9]+}}(%esp), %eax
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X86-NEXT: cmovel %eax, %ecx
@@ -703,9 +701,7 @@ define i32 @blsmsk32_z2(i32 %a, i32 %b,
; X64-LABEL: blsmsk32_z2:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %eax
-; X64-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-NEXT: leal -1(%rdi), %ecx
-; X64-NEXT: xorl %edi, %ecx
+; X64-NEXT: blsmskl %edi, %ecx
; X64-NEXT: cmovnel %edx, %eax
; X64-NEXT: retq
%t0 = sub i32 %a, 1
@@ -800,8 +796,7 @@ define i64 @blsmsk64_z2(i64 %a, i64 %b,
; X64-LABEL: blsmsk64_z2:
; X64: # %bb.0:
; X64-NEXT: movq %rsi, %rax
-; X64-NEXT: leaq -1(%rdi), %rcx
-; X64-NEXT: xorq %rdi, %rcx
+; X64-NEXT: blsmskq %rdi, %rcx
; X64-NEXT: cmovneq %rdx, %rax
; X64-NEXT: retq
%t0 = sub i64 %a, 1
Modified: llvm/trunk/test/CodeGen/X86/tbm_patterns.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/tbm_patterns.ll?rev=349962&r1=349961&r2=349962&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/tbm_patterns.ll (original)
+++ llvm/trunk/test/CodeGen/X86/tbm_patterns.ll Fri Dec 21 13:42:43 2018
@@ -226,10 +226,7 @@ define i32 @test_x86_tbm_blci_u32_z2(i32
; CHECK-LABEL: test_x86_tbm_blci_u32_z2:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %esi, %eax
-; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
-; CHECK-NEXT: leal 1(%rdi), %ecx
-; CHECK-NEXT: notl %ecx
-; CHECK-NEXT: orl %edi, %ecx
+; CHECK-NEXT: blcil %edi, %ecx
; CHECK-NEXT: cmovnel %edx, %eax
; CHECK-NEXT: retq
%t0 = add i32 1, %a
@@ -269,9 +266,7 @@ define i64 @test_x86_tbm_blci_u64_z2(i64
; CHECK-LABEL: test_x86_tbm_blci_u64_z2:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rsi, %rax
-; CHECK-NEXT: leaq 1(%rdi), %rcx
-; CHECK-NEXT: notq %rcx
-; CHECK-NEXT: orq %rdi, %rcx
+; CHECK-NEXT: blciq %rdi, %rcx
; CHECK-NEXT: cmovneq %rdx, %rax
; CHECK-NEXT: retq
%t0 = add i64 1, %a
@@ -409,9 +404,7 @@ define i32 @test_x86_tbm_blcmsk_u32_z2(i
; CHECK-LABEL: test_x86_tbm_blcmsk_u32_z2:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %esi, %eax
-; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
-; CHECK-NEXT: leal 1(%rdi), %ecx
-; CHECK-NEXT: xorl %edi, %ecx
+; CHECK-NEXT: blcmskl %edi, %ecx
; CHECK-NEXT: cmovnel %edx, %eax
; CHECK-NEXT: retq
%t0 = add i32 %a, 1
@@ -448,8 +441,7 @@ define i64 @test_x86_tbm_blcmsk_u64_z2(i
; CHECK-LABEL: test_x86_tbm_blcmsk_u64_z2:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rsi, %rax
-; CHECK-NEXT: leaq 1(%rdi), %rcx
-; CHECK-NEXT: xorq %rdi, %rcx
+; CHECK-NEXT: blcmskq %rdi, %rcx
; CHECK-NEXT: cmovneq %rdx, %rax
; CHECK-NEXT: retq
%t0 = add i64 %a, 1
@@ -486,9 +478,7 @@ define i32 @test_x86_tbm_blcs_u32_z2(i32
; CHECK-LABEL: test_x86_tbm_blcs_u32_z2:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %esi, %eax
-; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
-; CHECK-NEXT: leal 1(%rdi), %ecx
-; CHECK-NEXT: orl %edi, %ecx
+; CHECK-NEXT: blcsl %edi, %ecx
; CHECK-NEXT: cmovnel %edx, %eax
; CHECK-NEXT: retq
%t0 = add i32 %a, 1
@@ -525,8 +515,7 @@ define i64 @test_x86_tbm_blcs_u64_z2(i64
; CHECK-LABEL: test_x86_tbm_blcs_u64_z2:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rsi, %rax
-; CHECK-NEXT: leaq 1(%rdi), %rcx
-; CHECK-NEXT: orq %rdi, %rcx
+; CHECK-NEXT: blcsq %rdi, %rcx
; CHECK-NEXT: cmovneq %rdx, %rax
; CHECK-NEXT: retq
%t0 = add i64 %a, 1
@@ -563,9 +552,7 @@ define i32 @test_x86_tbm_blsfill_u32_z2(
; CHECK-LABEL: test_x86_tbm_blsfill_u32_z2:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %esi, %eax
-; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
-; CHECK-NEXT: leal -1(%rdi), %ecx
-; CHECK-NEXT: orl %edi, %ecx
+; CHECK-NEXT: blsfilll %edi, %ecx
; CHECK-NEXT: cmovnel %edx, %eax
; CHECK-NEXT: retq
%t0 = add i32 %a, -1
@@ -602,8 +589,7 @@ define i64 @test_x86_tbm_blsfill_u64_z2(
; CHECK-LABEL: test_x86_tbm_blsfill_u64_z2:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rsi, %rax
-; CHECK-NEXT: leaq -1(%rdi), %rcx
-; CHECK-NEXT: orq %rdi, %rcx
+; CHECK-NEXT: blsfillq %rdi, %rcx
; CHECK-NEXT: cmovneq %rdx, %rax
; CHECK-NEXT: retq
%t0 = add i64 %a, -1
@@ -642,10 +628,7 @@ define i32 @test_x86_tbm_blsic_u32_z2(i3
; CHECK-LABEL: test_x86_tbm_blsic_u32_z2:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %esi, %eax
-; CHECK-NEXT: movl %edi, %ecx
-; CHECK-NEXT: notl %ecx
-; CHECK-NEXT: decl %edi
-; CHECK-NEXT: orl %ecx, %edi
+; CHECK-NEXT: blsicl %edi, %ecx
; CHECK-NEXT: cmovnel %edx, %eax
; CHECK-NEXT: retq
%t0 = xor i32 %a, -1
@@ -685,10 +668,7 @@ define i64 @test_x86_tbm_blsic_u64_z2(i6
; CHECK-LABEL: test_x86_tbm_blsic_u64_z2:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rsi, %rax
-; CHECK-NEXT: movq %rdi, %rcx
-; CHECK-NEXT: notq %rcx
-; CHECK-NEXT: decq %rdi
-; CHECK-NEXT: orq %rcx, %rdi
+; CHECK-NEXT: blsicq %rdi, %rcx
; CHECK-NEXT: cmovneq %rdx, %rax
; CHECK-NEXT: retq
%t0 = xor i64 %a, -1
@@ -728,10 +708,7 @@ define i32 @test_x86_tbm_t1mskc_u32_z2(i
; CHECK-LABEL: test_x86_tbm_t1mskc_u32_z2:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %esi, %eax
-; CHECK-NEXT: movl %edi, %ecx
-; CHECK-NEXT: notl %ecx
-; CHECK-NEXT: incl %edi
-; CHECK-NEXT: orl %ecx, %edi
+; CHECK-NEXT: t1mskcl %edi, %ecx
; CHECK-NEXT: cmovnel %edx, %eax
; CHECK-NEXT: retq
%t0 = xor i32 %a, -1
@@ -771,10 +748,7 @@ define i64 @test_x86_tbm_t1mskc_u64_z2(i
; CHECK-LABEL: test_x86_tbm_t1mskc_u64_z2:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rsi, %rax
-; CHECK-NEXT: movq %rdi, %rcx
-; CHECK-NEXT: notq %rcx
-; CHECK-NEXT: incq %rdi
-; CHECK-NEXT: orq %rcx, %rdi
+; CHECK-NEXT: t1mskcq %rdi, %rcx
; CHECK-NEXT: cmovneq %rdx, %rax
; CHECK-NEXT: retq
%t0 = xor i64 %a, -1
More information about the llvm-commits
mailing list