[llvm] bc110de - [SelectionDAG] don't split branch on logic-of-vector-compares
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 2 14:05:33 PDT 2020
Author: Sanjay Patel
Date: 2020-07-02T17:05:24-04:00
New Revision: bc110de78a4bf47f63267eae07ef02f14bcc78e3
URL: https://github.com/llvm/llvm-project/commit/bc110de78a4bf47f63267eae07ef02f14bcc78e3
DIFF: https://github.com/llvm/llvm-project/commit/bc110de78a4bf47f63267eae07ef02f14bcc78e3.diff
LOG: [SelectionDAG] don't split branch on logic-of-vector-compares
SelectionDAGBuilder converts logic-of-compares into multiple branches based
on a boolean TLI setting in isJumpExpensive(). But that probably never
considered the pattern of extracted bools from a vector compare - it seems
unlikely that we would want to turn vector logic into control-flow.
The motivating x86 reduction case is shown in PR44565:
https://bugs.llvm.org/show_bug.cgi?id=44565
...and that test shows the expected improvement from using pmovmsk codegen.
For AArch64, I modified the test to include an extra op because the simpler
test gets transformed by a codegen invocation of SimplifyCFG.
Differential Revision: https://reviews.llvm.org/D82602
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/test/CodeGen/AArch64/vec-extract-branch.ll
llvm/test/CodeGen/X86/setcc-logic.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index c7f1780770dc..1645a1f136bc 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -2303,7 +2303,9 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
// If this is a series of conditions that are or'd or and'd together, emit
// this as a sequence of branches instead of setcc's with and/or operations.
- // As long as jumps are not expensive, this should improve performance.
+ // As long as jumps are not expensive (exceptions for multi-use logic ops,
+ // unpredictable branches, and vector extracts because those jumps are likely
+ // expensive for any target), this should improve performance.
// For example, instead of something like:
// cmp A, B
// C = seteq
@@ -2318,9 +2320,12 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
// jle foo
if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
Instruction::BinaryOps Opcode = BOp->getOpcode();
+ Value *Vec, *BOp0 = BOp->getOperand(0), *BOp1 = BOp->getOperand(1);
if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() &&
!I.hasMetadata(LLVMContext::MD_unpredictable) &&
- (Opcode == Instruction::And || Opcode == Instruction::Or)) {
+ (Opcode == Instruction::And || Opcode == Instruction::Or) &&
+ !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
+ match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) {
FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
Opcode,
getEdgeProbability(BrMBB, Succ0MBB),
diff --git a/llvm/test/CodeGen/AArch64/vec-extract-branch.ll b/llvm/test/CodeGen/AArch64/vec-extract-branch.ll
index 22f6c86a07fb..e05213d53421 100644
--- a/llvm/test/CodeGen/AArch64/vec-extract-branch.ll
+++ b/llvm/test/CodeGen/AArch64/vec-extract-branch.ll
@@ -6,16 +6,15 @@ define i32 @vec_extract_branch(<2 x double> %x, i32 %y) {
; CHECK: // %bb.0:
; CHECK-NEXT: fcmgt v0.2d, v0.2d, #0.0
; CHECK-NEXT: xtn v0.2s, v0.2d
-; CHECK-NEXT: fmov w8, s0
-; CHECK-NEXT: tbz w8, #0, .LBB0_3
-; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: mov w8, v0.s[1]
-; CHECK-NEXT: tbz w8, #0, .LBB0_3
-; CHECK-NEXT: // %bb.2: // %true
+; CHECK-NEXT: fmov w9, s0
+; CHECK-NEXT: and w8, w9, w8
+; CHECK-NEXT: tbz w8, #0, .LBB0_2
+; CHECK-NEXT: // %bb.1: // %true
; CHECK-NEXT: mov w8, #42
; CHECK-NEXT: sdiv w0, w8, w0
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB0_3: // %false
+; CHECK-NEXT: .LBB0_2: // %false
; CHECK-NEXT: mov w0, #88
; CHECK-NEXT: ret
%t1 = fcmp ogt <2 x double> %x, zeroinitializer
diff --git a/llvm/test/CodeGen/X86/setcc-logic.ll b/llvm/test/CodeGen/X86/setcc-logic.ll
index a878a33d270c..59e0efc18c87 100644
--- a/llvm/test/CodeGen/X86/setcc-logic.ll
+++ b/llvm/test/CodeGen/X86/setcc-logic.ll
@@ -323,15 +323,12 @@ define i32 @vec_extract_branch(<2 x double> %x) {
; CHECK-NEXT: xorpd %xmm1, %xmm1
; CHECK-NEXT: cmpltpd %xmm0, %xmm1
; CHECK-NEXT: movmskpd %xmm1, %eax
-; CHECK-NEXT: testb $1, %al
-; CHECK-NEXT: je .LBB16_3
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: shrb %al
-; CHECK-NEXT: je .LBB16_3
-; CHECK-NEXT: # %bb.2: # %true
+; CHECK-NEXT: cmpb $3, %al
+; CHECK-NEXT: jne .LBB16_2
+; CHECK-NEXT: # %bb.1: # %true
; CHECK-NEXT: movl $42, %eax
; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB16_3: # %false
+; CHECK-NEXT: .LBB16_2: # %false
; CHECK-NEXT: movl $88, %eax
; CHECK-NEXT: retq
%t1 = fcmp ogt <2 x double> %x, zeroinitializer
More information about the llvm-commits
mailing list