[llvm] r258847 - Allow X86::COND_NE_OR_P and X86::COND_NP_OR_E to be reversed.

Cong Hou via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 26 12:08:02 PST 2016


Author: conghou
Date: Tue Jan 26 14:08:01 2016
New Revision: 258847

URL: http://llvm.org/viewvc/llvm-project?rev=258847&view=rev
Log:
Allow X86::COND_NE_OR_P and X86::COND_NP_OR_E to be reversed.

Currently, AnalyzeBranch() fails non-equality comparison between floating points
on X86 (see https://llvm.org/bugs/show_bug.cgi?id=23875). This is because this
function can modify the branch by reversing the conditional jump and removing
unconditional jump if there is a proper fall-through. However, in the case of
non-equality comparison between floating points, this can turn the branch
"unanalyzable". Consider the following case:

jne.BB1
jp.BB1
jmp.BB2
.BB1:
...
.BB2:
...

AnalyzeBranch() will reverse "jp .BB1" to "jnp .BB2" and then "jmp .BB2" will be
removed:

jne.BB1
jnp.BB2
.BB1:
...
.BB2:
...

However, AnalyzeBranch() cannot analyze this branch anymore as there are two
conditional jumps with different targets. This may disable some optimizations
like block-placement: in this case the fall-through behavior is enforced even if
the fall-through block is very cold, which is suboptimal.

Actually this optimization is also done in block-placement pass, which means we
can remove this optimization from AnalyzeBranch(). However, currently
X86::COND_NE_OR_P and X86::COND_NP_OR_E are not reversible: there is no defined
negation conditions for them.

In order to reverse them, this patch defines two new CondCode X86::COND_E_AND_NP
and X86::COND_P_AND_NE. It also defines how to synthesize instructions for them.
Here only the second conditional jump is reversed. This is valid as we only need
them to do this "unconditional jump removal" optimization.


Differential Revision: http://reviews.llvm.org/D11393



Modified:
    llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
    llvm/trunk/lib/Target/X86/X86InstrInfo.h
    llvm/trunk/test/CodeGen/X86/block-placement.ll
    llvm/trunk/test/CodeGen/X86/fast-isel-cmp-branch2.ll
    llvm/trunk/test/CodeGen/X86/fast-isel-cmp-branch3.ll
    llvm/trunk/test/CodeGen/X86/fp-une-cmp.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=258847&r1=258846&r2=258847&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Tue Jan 26 14:08:01 2016
@@ -3805,6 +3805,10 @@ X86::CondCode X86::GetOppositeBranchCond
   case X86::COND_NP: return X86::COND_P;
   case X86::COND_O:  return X86::COND_NO;
   case X86::COND_NO: return X86::COND_O;
+  case X86::COND_NE_OR_P:  return X86::COND_E_AND_NP;
+  case X86::COND_NP_OR_E:  return X86::COND_P_AND_NE;
+  case X86::COND_E_AND_NP: return X86::COND_NE_OR_P;
+  case X86::COND_P_AND_NE: return X86::COND_NP_OR_E;
   }
 }
 
@@ -3998,9 +4002,9 @@ bool X86InstrInfo::AnalyzeBranchImpl(
         MachineBasicBlock::iterator OldInst = I;
 
         BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(JNCC))
-          .addMBB(UnCondBrIter->getOperand(0).getMBB());
+            .addMBB(UnCondBrIter->getOperand(0).getMBB());
         BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JMP_1))
-          .addMBB(TargetBB);
+            .addMBB(TargetBB);
 
         OldInst->eraseFromParent();
         UnCondBrIter->eraseFromParent();
@@ -4024,11 +4028,6 @@ bool X86InstrInfo::AnalyzeBranchImpl(
     assert(Cond.size() == 1);
     assert(TBB);
 
-    // Only handle the case where all conditional branches branch to the same
-    // destination.
-    if (TBB != I->getOperand(0).getMBB())
-      return true;
-
     // If the conditions are the same, we can leave them alone.
     X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm();
     if (OldBranchCode == BranchCode)
@@ -4037,17 +4036,40 @@ bool X86InstrInfo::AnalyzeBranchImpl(
     // If they differ, see if they fit one of the known patterns. Theoretically,
     // we could handle more patterns here, but we shouldn't expect to see them
     // if instruction selection has done a reasonable job.
-    if ((OldBranchCode == X86::COND_NP &&
-         BranchCode == X86::COND_E) ||
-        (OldBranchCode == X86::COND_E &&
-         BranchCode == X86::COND_NP))
+    auto NewTBB = I->getOperand(0).getMBB();
+    if (TBB == NewTBB &&
+        ((OldBranchCode == X86::COND_NP && BranchCode == X86::COND_E) ||
+         (OldBranchCode == X86::COND_E && BranchCode == X86::COND_NP))) {
       BranchCode = X86::COND_NP_OR_E;
-    else if ((OldBranchCode == X86::COND_P &&
-              BranchCode == X86::COND_NE) ||
-             (OldBranchCode == X86::COND_NE &&
-              BranchCode == X86::COND_P))
+    } else if (TBB == NewTBB &&
+               ((OldBranchCode == X86::COND_P && BranchCode == X86::COND_NE) ||
+                (OldBranchCode == X86::COND_NE && BranchCode == X86::COND_P))) {
       BranchCode = X86::COND_NE_OR_P;
-    else
+    } else if ((OldBranchCode == X86::COND_NE && BranchCode == X86::COND_NP) ||
+               (OldBranchCode == X86::COND_P && BranchCode == X86::COND_E)) {
+      // X86::COND_P_AND_NE usually has two different branch destinations.
+      //
+      // JNP B1
+      // JNE B2
+      // B1: (fall-through)
+      // B2:
+      //
+      // Here this condition branches to B2 only if P && NE. It has another
+      // equivalent form:
+      //
+      // JE B1
+      // JP B2
+      // B1: (fall-through)
+      // B2:
+      //
+      // Similarly it branches to B2 only if NE && P. That is why this condition
+      // is named COND_P_AND_NE.
+      BranchCode = X86::COND_P_AND_NE;
+    } else if ((OldBranchCode == X86::COND_NP && BranchCode == X86::COND_NE) ||
+               (OldBranchCode == X86::COND_E && BranchCode == X86::COND_P)) {
+      // See comments above for X86::COND_P_AND_NE.
+      BranchCode = X86::COND_E_AND_NP;
+    } else
       return true;
 
     // Update the MachineOperand.
@@ -4156,6 +4178,13 @@ unsigned X86InstrInfo::RemoveBranch(Mach
   return Count;
 }
 
+static MachineBasicBlock *getFallThroughMBB(MachineBasicBlock *MBB) {
+  auto I = std::next(MBB->getIterator());
+  if (I == MBB->getParent()->end())
+    return nullptr;
+  return &*I;
+}
+
 unsigned
 X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                            MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
@@ -4172,6 +4201,9 @@ X86InstrInfo::InsertBranch(MachineBasicB
     return 1;
   }
 
+  // If FBB is null, it is implied to be a fall-through block.
+  bool FallThru = FBB == nullptr;
+
   // Conditional branch.
   unsigned Count = 0;
   X86::CondCode CC = (X86::CondCode)Cond[0].getImm();
@@ -4190,13 +4222,39 @@ X86InstrInfo::InsertBranch(MachineBasicB
     BuildMI(&MBB, DL, get(X86::JP_1)).addMBB(TBB);
     ++Count;
     break;
+  case X86::COND_P_AND_NE:
+    // Use the next block of MBB as FBB if it is null.
+    if (FBB == nullptr) {
+      FBB = getFallThroughMBB(&MBB);
+      assert(FBB && "MBB cannot be the last block in function when the false "
+                    "body is a fall-through.");
+    }
+    // Synthesize NEG_NP_OR_E with two branches.
+    BuildMI(&MBB, DL, get(X86::JNP_1)).addMBB(FBB);
+    ++Count;
+    BuildMI(&MBB, DL, get(X86::JNE_1)).addMBB(TBB);
+    ++Count;
+    break;
+  case X86::COND_E_AND_NP:
+    // Use the next block of MBB as FBB if it is null.
+    if (FBB == nullptr) {
+      FBB = getFallThroughMBB(&MBB);
+      assert(FBB && "MBB cannot be the last block in function when the false "
+                    "body is a fall-through.");
+    }
+    // Synthesize NEG_NE_OR_P with two branches.
+    BuildMI(&MBB, DL, get(X86::JNE_1)).addMBB(FBB);
+    ++Count;
+    BuildMI(&MBB, DL, get(X86::JNP_1)).addMBB(TBB);
+    ++Count;
+    break;
   default: {
     unsigned Opc = GetCondBranchFromCond(CC);
     BuildMI(&MBB, DL, get(Opc)).addMBB(TBB);
     ++Count;
   }
   }
-  if (FBB) {
+  if (!FallThru) {
     // Two-way Conditional branch. Insert the second branch.
     BuildMI(&MBB, DL, get(X86::JMP_1)).addMBB(FBB);
     ++Count;
@@ -6717,8 +6775,6 @@ bool X86InstrInfo::
 ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
   assert(Cond.size() == 1 && "Invalid X86 branch condition!");
   X86::CondCode CC = static_cast<X86::CondCode>(Cond[0].getImm());
-  if (CC == X86::COND_NE_OR_P || CC == X86::COND_NP_OR_E)
-    return true;
   Cond[0].setImm(GetOppositeBranchCondition(CC));
   return false;
 }

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.h?rev=258847&r1=258846&r2=258847&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.h Tue Jan 26 14:08:01 2016
@@ -29,35 +29,44 @@ namespace llvm {
 namespace X86 {
   // X86 specific condition code. These correspond to X86_*_COND in
   // X86InstrInfo.td. They must be kept in synch.
-  enum CondCode {
-    COND_A  = 0,
-    COND_AE = 1,
-    COND_B  = 2,
-    COND_BE = 3,
-    COND_E  = 4,
-    COND_G  = 5,
-    COND_GE = 6,
-    COND_L  = 7,
-    COND_LE = 8,
-    COND_NE = 9,
-    COND_NO = 10,
-    COND_NP = 11,
-    COND_NS = 12,
-    COND_O  = 13,
-    COND_P  = 14,
-    COND_S  = 15,
-    LAST_VALID_COND = COND_S,
+enum CondCode {
+  COND_A  = 0,
+  COND_AE = 1,
+  COND_B  = 2,
+  COND_BE = 3,
+  COND_E  = 4,
+  COND_G  = 5,
+  COND_GE = 6,
+  COND_L  = 7,
+  COND_LE = 8,
+  COND_NE = 9,
+  COND_NO = 10,
+  COND_NP = 11,
+  COND_NS = 12,
+  COND_O  = 13,
+  COND_P  = 14,
+  COND_S  = 15,
+  LAST_VALID_COND = COND_S,
 
-    // Artificial condition codes. These are used by AnalyzeBranch
-    // to indicate a block terminated with two conditional branches to
-    // the same location. This occurs in code using FCMP_OEQ or FCMP_UNE,
-    // which can't be represented on x86 with a single condition. These
-    // are never used in MachineInstrs.
-    COND_NE_OR_P,
-    COND_NP_OR_E,
+  // Artificial condition codes. These are used by AnalyzeBranch
+  // to indicate a block terminated with two conditional branches to
+  // the same location. This occurs in code using FCMP_OEQ or FCMP_UNE,
+  // which can't be represented on x86 with a single condition. These
+  // are never used in MachineInstrs.
+  COND_NE_OR_P,
+  COND_NP_OR_E,
 
-    COND_INVALID
-  };
+  // Artificial condition codes. These are used to represent the negation of
+  // above two conditions. The only scenario we need these two conditions is
+  // when we try to reverse above two conditions in order to remove redundant
+  // unconditional jumps. Note that both true and false bodies need to be
+  // avaiable in order to correctly synthesize instructions for them. These are
+  // never used in MachineInstrs.
+  COND_E_AND_NP, // negate of COND_NE_OR_P
+  COND_P_AND_NE, // negate of COND_NP_OR_E
+
+  COND_INVALID
+};
 
   // Turn condition code into conditional branch opcode.
   unsigned GetCondBranchFromCond(CondCode CC);

Modified: llvm/trunk/test/CodeGen/X86/block-placement.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/block-placement.ll?rev=258847&r1=258846&r2=258847&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/block-placement.ll (original)
+++ llvm/trunk/test/CodeGen/X86/block-placement.ll Tue Jan 26 14:08:01 2016
@@ -463,26 +463,23 @@ exit:
 }
 
 define void @fpcmp_unanalyzable_branch(i1 %cond) {
-; This function's CFG contains an unanalyzable branch that is likely to be
-; split due to having a different high-probability predecessor.
+; This function's CFG contains an once-unanalyzable branch (une on floating
+; points). As now it becomes analyzable, we should get best layout in which each
+; edge in 'entry' -> 'entry.if.then_crit_edge' -> 'if.then' -> 'if.end' is
+; fall-through.
 ; CHECK: fpcmp_unanalyzable_branch
 ; CHECK: %entry
+; CHECK: %entry.if.then_crit_edge
+; CHECK: %if.then
+; CHECK: %if.end
 ; CHECK: %exit
-; CHECK-NOT: %if.then
-; CHECK-NOT: %if.end
-; CHECK-NOT: jne
-; CHECK-NOT: jnp
 ; CHECK: jne
 ; CHECK-NEXT: jnp
-; CHECK-NEXT: %if.then
 
 entry:
 ; Note that this branch must be strongly biased toward
 ; 'entry.if.then_crit_edge' to ensure that we would try to form a chain for
-; 'entry' -> 'entry.if.then_crit_edge' -> 'if.then'. It is the last edge in that
-; chain which would violate the unanalyzable branch in 'exit', but we won't even
-; try this trick unless 'if.then' is believed to almost always be reached from
-; 'entry.if.then_crit_edge'.
+; 'entry' -> 'entry.if.then_crit_edge' -> 'if.then' -> 'if.end'.
   br i1 %cond, label %entry.if.then_crit_edge, label %lor.lhs.false, !prof !1
 
 entry.if.then_crit_edge:
@@ -494,7 +491,7 @@ lor.lhs.false:
 
 exit:
   %cmp.i = fcmp une double 0.000000e+00, undef
-  br i1 %cmp.i, label %if.then, label %if.end
+  br i1 %cmp.i, label %if.then, label %if.end, !prof !3
 
 if.then:
   %0 = phi i8 [ %.pre14, %entry.if.then_crit_edge ], [ undef, %exit ]
@@ -507,6 +504,7 @@ if.end:
 }
 
 !1 = !{!"branch_weights", i32 1000, i32 1}
+!3 = !{!"branch_weights", i32 1, i32 1000}
 
 declare i32 @f()
 declare i32 @g()
@@ -665,11 +663,14 @@ define void @unanalyzable_branch_to_best
 ; Ensure that we can handle unanalyzable branches where the destination block
 ; gets selected as the optimal successor to merge.
 ;
+; This branch is now analyzable and hence the destination block becomes the
+; hotter one. The right order is entry->bar->exit->foo.
+;
 ; CHECK: unanalyzable_branch_to_best_succ
 ; CHECK: %entry
-; CHECK: %foo
 ; CHECK: %bar
 ; CHECK: %exit
+; CHECK: %foo
 
 entry:
   ; Bias this branch toward bar to ensure we form that chain.

Modified: llvm/trunk/test/CodeGen/X86/fast-isel-cmp-branch2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fast-isel-cmp-branch2.ll?rev=258847&r1=258846&r2=258847&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fast-isel-cmp-branch2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fast-isel-cmp-branch2.ll Tue Jan 26 14:08:01 2016
@@ -5,7 +5,7 @@ define i32 @fcmp_oeq(float %x, float %y)
 ; CHECK-LABEL: fcmp_oeq
 ; CHECK:       ucomiss  %xmm1, %xmm0
 ; CHECK-NEXT:  jne {{LBB.+_1}}
-; CHECK-NEXT:  jnp {{LBB.+_2}}
+; CHECK-NEXT:  jp {{LBB.+_1}}
   %1 = fcmp oeq float %x, %y
   br i1 %1, label %bb1, label %bb2
 bb2:
@@ -162,8 +162,7 @@ define i32 @fcmp_une(float %x, float %y)
 ; CHECK-LABEL: fcmp_une
 ; CHECK:       ucomiss  %xmm1, %xmm0
 ; CHECK-NEXT:  jne {{LBB.+_2}}
-; CHECK-NEXT:  jp  {{LBB.+_2}}
-; CHECK-NEXT:  jmp {{LBB.+_1}}
+; CHECK-NEXT:  jnp {{LBB.+_1}}
   %1 = fcmp une float %x, %y
   br i1 %1, label %bb1, label %bb2
 bb2:

Modified: llvm/trunk/test/CodeGen/X86/fast-isel-cmp-branch3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fast-isel-cmp-branch3.ll?rev=258847&r1=258846&r2=258847&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fast-isel-cmp-branch3.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fast-isel-cmp-branch3.ll Tue Jan 26 14:08:01 2016
@@ -17,7 +17,7 @@ define i32 @fcmp_oeq2(float %x) {
 ; CHECK:       xorps    %xmm1, %xmm1
 ; CHECK-NEXT:  ucomiss  %xmm1, %xmm0
 ; CHECK-NEXT:  jne {{LBB.+_1}}
-; CHECK-NEXT:  jnp {{LBB.+_2}}
+; CHECK-NEXT:  jp {{LBB.+_1}}
   %1 = fcmp oeq float %x, 0.000000e+00
   br i1 %1, label %bb1, label %bb2
 bb2:
@@ -338,8 +338,7 @@ define i32 @fcmp_une2(float %x) {
 ; CHECK:       xorps    %xmm1, %xmm1
 ; CHECK-NEXT:  ucomiss  %xmm1, %xmm0
 ; CHECK-NEXT:  jne {{LBB.+_2}}
-; CHECK-NEXT:  jp {{LBB.+_2}}
-; CHECK-NEXT:  jmp {{LBB.+_1}}
+; CHECK-NEXT:  jnp {{LBB.+_1}}
   %1 = fcmp une float %x, 0.000000e+00
   br i1 %1, label %bb1, label %bb2
 bb2:

Modified: llvm/trunk/test/CodeGen/X86/fp-une-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fp-une-cmp.ll?rev=258847&r1=258846&r2=258847&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fp-une-cmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fp-une-cmp.ll Tue Jan 26 14:08:01 2016
@@ -19,12 +19,12 @@
 ;       addsd ...
 ;   LBB0_2:
 
-; CHECK:       func
+define float @func1(float %x, float %y) nounwind readnone optsize ssp {
+; CHECK:       func1
 ; CHECK:       jne [[LABEL:.*]]
 ; CHECK-NEXT:  jp  [[LABEL]]
 ; CHECK-NOT:   jmp
-
-define float @func(float %x, float %y) nounwind readnone optsize ssp {
+;
 entry:
   %0 = fpext float %x to double
   %1 = fpext float %y to double
@@ -41,3 +41,30 @@ bb2:
   %.0 = fptrunc double %.0.in to float
   ret float %.0
 }
+
+define float @func2(float %x, float %y) nounwind readnone optsize ssp {
+; CHECK:       func2
+; CHECK:       jne [[LABEL:.*]]
+; CHECK-NEXT:  jp  [[LABEL]]
+; CHECK:       %bb2
+; CHECK:       %bb1
+; CHECK:       jmp
+;
+entry:
+  %0 = fpext float %x to double
+  %1 = fpext float %y to double
+  %2 = fmul double %0, %1
+  %3 = fcmp une double %2, 0.000000e+00
+  br i1 %3, label %bb1, label %bb2, !prof !1
+
+bb1:
+  %4 = fadd double %2, -1.000000e+00
+  br label %bb2
+
+bb2:
+  %.0.in = phi double [ %4, %bb1 ], [ %2, %entry ]
+  %.0 = fptrunc double %.0.in to float
+  ret float %.0
+}
+
+!1 = !{!"branch_weights", i32 1, i32 1000}




More information about the llvm-commits mailing list