[llvm] r321528 - Revert r321377, it causes regression to https://reviews.llvm.org/P8055.

Guozhi Wei via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 28 09:02:34 PST 2017


Author: carrot
Date: Thu Dec 28 09:02:34 2017
New Revision: 321528

URL: http://llvm.org/viewvc/llvm-project?rev=321528&view=rev
Log:
Revert r321377, it causes regression to https://reviews.llvm.org/P8055.


Removed:
    llvm/trunk/test/Transforms/SimplifyCFG/X86/if-conversion.ll
Modified:
    llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
    llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h
    llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h
    llvm/trunk/lib/Analysis/TargetTransformInfo.cpp
    llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp

Modified: llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h?rev=321528&r1=321527&r2=321528&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h (original)
+++ llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h Thu Dec 28 09:02:34 2017
@@ -646,9 +646,6 @@ public:
   /// \brief Additional properties of an operand's values.
   enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };
 
-  /// \return True if target can execute instructions out of order.
-  bool isOutOfOrder() const;
-
   /// \return The number of scalar or vector registers that the target has.
   /// If 'Vectors' is true, it returns the number of vector registers. If it is
   /// set to false, it returns the number of scalar registers.
@@ -1021,7 +1018,6 @@ public:
                             Type *Ty) = 0;
   virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
                             Type *Ty) = 0;
-  virtual bool isOutOfOrder() const = 0;
   virtual unsigned getNumberOfRegisters(bool Vector) = 0;
   virtual unsigned getRegisterBitWidth(bool Vector) const = 0;
   virtual unsigned getMinVectorRegisterBitWidth() = 0;
@@ -1299,9 +1295,6 @@ public:
                     Type *Ty) override {
     return Impl.getIntImmCost(IID, Idx, Imm, Ty);
   }
-  bool isOutOfOrder() const override {
-    return Impl.isOutOfOrder();
-  }
   unsigned getNumberOfRegisters(bool Vector) override {
     return Impl.getNumberOfRegisters(Vector);
   }

Modified: llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h?rev=321528&r1=321527&r2=321528&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h (original)
+++ llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h Thu Dec 28 09:02:34 2017
@@ -337,8 +337,6 @@ public:
     return TTI::TCC_Free;
   }
 
-  bool isOutOfOrder() const { return false; }
-
   unsigned getNumberOfRegisters(bool Vector) { return 8; }
 
   unsigned getRegisterBitWidth(bool Vector) const { return 32; }

Modified: llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h?rev=321528&r1=321527&r2=321528&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h (original)
+++ llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h Thu Dec 28 09:02:34 2017
@@ -402,10 +402,6 @@ public:
     return BaseT::getInstructionLatency(I);
   }
 
-  bool isOutOfOrder() const {
-    return getST()->getSchedModel().isOutOfOrder();
-  }
-
   /// @}
 
   /// \name Vector TTI Implementations

Modified: llvm/trunk/lib/Analysis/TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/TargetTransformInfo.cpp?rev=321528&r1=321527&r2=321528&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Analysis/TargetTransformInfo.cpp Thu Dec 28 09:02:34 2017
@@ -314,10 +314,6 @@ int TargetTransformInfo::getIntImmCost(I
   return Cost;
 }
 
-bool TargetTransformInfo::isOutOfOrder() const {
-  return TTIImpl->isOutOfOrder();
-}
-
 unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const {
   return TTIImpl->getNumberOfRegisters(Vector);
 }

Modified: llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp?rev=321528&r1=321527&r2=321528&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp Thu Dec 28 09:02:34 2017
@@ -127,16 +127,6 @@ static cl::opt<unsigned> MaxSpeculationD
     cl::desc("Limit maximum recursion depth when calculating costs of "
              "speculatively executed instructions"));
 
-static cl::opt<unsigned> DependenceChainLatency(
-    "dependence-chain-latency", cl::Hidden, cl::init(8),
-    cl::desc("Limit the maximum latency of dependence chain containing cmp "
-             "for if conversion"));
-
-static cl::opt<unsigned> SmallBBSize(
-    "small-bb-size", cl::Hidden, cl::init(40),
-    cl::desc("Check dependence chain latency only in basic block smaller than "
-             "this number"));
-
 STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
 STATISTIC(NumLinearMaps,
           "Number of switch instructions turned into linear mapping");
@@ -405,166 +395,6 @@ static bool DominatesMergePoint(Value *V
   return true;
 }
 
-/// Estimate the code size of the specified BB.
-static unsigned CountBBCodeSize(BasicBlock *BB,
-                                const TargetTransformInfo &TTI) {
-  unsigned Size = 0;
-  for (auto II = BB->begin(); !isa<TerminatorInst>(II); ++II)
-    Size += TTI.getInstructionCost(&(*II), TargetTransformInfo::TCK_CodeSize);
-  return Size;
-}
-
-/// Find out the latency of the longest dependence chain in the BB if
-/// LongestChain is true, or the dependence chain containing the compare
-/// instruction feeding the block's conditional branch.
-static unsigned FindDependenceChainLatency(BasicBlock *BB,
-                            DenseMap<Instruction *, unsigned> &Instructions,
-                            const TargetTransformInfo &TTI,
-                            bool LongestChain) {
-  unsigned MaxLatency = 0;
-
-  BasicBlock::iterator II;
-  for (II = BB->begin(); !isa<TerminatorInst>(II); ++II) {
-    unsigned Latency = 0;
-    for (unsigned O = 0, E = II->getNumOperands(); O != E; ++O) {
-      Instruction *Op = dyn_cast<Instruction>(II->getOperand(O));
-      if (Op && Instructions.count(Op)) {
-        auto OpLatency = Instructions[Op];
-        if (OpLatency > Latency)
-          Latency = OpLatency;
-      }
-    }
-    Latency += TTI.getInstructionCost(&(*II), TargetTransformInfo::TCK_Latency);
-    Instructions[&(*II)] = Latency;
-
-    if (Latency > MaxLatency)
-      MaxLatency = Latency;
-  }
-
-  if (LongestChain)
-    return MaxLatency;
-
-  // The length of the dependence chain containing the compare instruction is
-  // wanted, so the terminator must be a BranchInst.
-  assert(isa<BranchInst>(II));
-  BranchInst* Br = cast<BranchInst>(II);
-  Instruction *Cmp = dyn_cast<Instruction>(Br->getCondition());
-  if (Cmp && Instructions.count(Cmp))
-    return Instructions[Cmp];
-  else
-    return 0;
-}
-
-/// Instructions in BB2 may depend on instructions in BB1, and instructions
-/// in BB1 may have users in BB2. If the last (in terms of latency) such kind
-/// of instruction in BB1 is I, then the instructions after I can be executed
-/// in parallel with instructions in BB2.
-/// This function returns the latency of I.
-static unsigned LatencyAdjustment(BasicBlock *BB1, BasicBlock *BB2,
-                        BasicBlock *IfBlock1, BasicBlock *IfBlock2,
-                        DenseMap<Instruction *, unsigned> &BB1Instructions) {
-  unsigned LastLatency = 0;
-  SmallVector<Instruction *, 16> Worklist;
-  BasicBlock::iterator II;
-  for (II = BB2->begin(); !isa<TerminatorInst>(II); ++II) {
-    if (PHINode *PN = dyn_cast<PHINode>(II)) {
-      // Look for users in BB2.
-      bool InBBUser = false;
-      for (User *U : PN->users()) {
-        if (cast<Instruction>(U)->getParent() == BB2) {
-          InBBUser = true;
-          break;
-        }
-      }
-      // No such user, we don't care about this instruction and its operands.
-      if (!InBBUser)
-        break;
-    }
-    Worklist.push_back(&(*II));
-  }
-
-  while (!Worklist.empty()) {
-    Instruction *I = Worklist.pop_back_val();
-    for (unsigned O = 0, E = I->getNumOperands(); O != E; ++O) {
-      if (Instruction *Op = dyn_cast<Instruction>(I->getOperand(O))) {
-        if (Op->getParent() == IfBlock1 || Op->getParent() == IfBlock2)
-          Worklist.push_back(Op);
-        else if (Op->getParent() == BB1 && BB1Instructions.count(Op)) {
-          if (BB1Instructions[Op] > LastLatency)
-            LastLatency = BB1Instructions[Op];
-        }
-      }
-    }
-  }
-
-  return LastLatency;
-}
-
-/// If after if conversion, most of the instructions in this new BB construct a
-/// long and slow dependence chain, it may be slower than cmp/branch, even
-/// if the branch has a high miss rate, because the control dependence is
-/// transformed into data dependence, and control dependence can be speculated,
-/// and thus, the second part can execute in parallel with the first part on
-/// modern OOO processor.
-///
-/// To check this condition, this function finds the length of the dependence
-/// chain in BB1 (only the part that can be executed in parallel with code after
-/// branch in BB2) containing cmp, and if the length is longer than a threshold,
-/// don't perform if conversion.
-///
-/// BB1, BB2, IfBlock1 and IfBlock2 are candidate BBs for if conversion.
-/// SpeculationSize contains the code size of IfBlock1 and IfBlock2.
-static bool FindLongDependenceChain(BasicBlock *BB1, BasicBlock *BB2,
-                             BasicBlock *IfBlock1, BasicBlock *IfBlock2,
-                             unsigned SpeculationSize,
-                             const TargetTransformInfo &TTI) {
-  // Accumulated latency of each instruction in their BBs.
-  DenseMap<Instruction *, unsigned> BB1Instructions;
-  DenseMap<Instruction *, unsigned> BB2Instructions;
-
-  if (!TTI.isOutOfOrder())
-    return false;
-
-  unsigned NewBBSize = CountBBCodeSize(BB1, TTI) + CountBBCodeSize(BB2, TTI)
-                         + SpeculationSize;
-
-  // We check small BB only since it is more difficult to find unrelated
-  // instructions to fill functional units in a small BB.
-  if (NewBBSize > SmallBBSize)
-    return false;
-
-  auto BB1Chain =
-         FindDependenceChainLatency(BB1, BB1Instructions, TTI, false);
-  auto BB2Chain =
-         FindDependenceChainLatency(BB2, BB2Instructions, TTI, true);
-
-  // If there are many unrelated instructions in the new BB, there will be
-  // other instructions for the processor to issue regardless of the length
-  // of this new dependence chain.
-  // Modern processors can issue 3 or more instructions in each cycle. But in
-  // real world applications, an IPC of 2 is already very good for non-loop
-  // code with small basic blocks. Higher IPC is usually found in programs with
-  // small kernel. So IPC of 2 is more reasonable for most applications.
-  if ((BB1Chain + BB2Chain) * 2 <= NewBBSize)
-    return false;
-
-  // We only care about part of the dependence chain in BB1 that can be
-  // executed in parallel with BB2, so adjust the latency.
-  BB1Chain -=
-      LatencyAdjustment(BB1, BB2, IfBlock1, IfBlock2, BB1Instructions);
-
-  // Correctly predicted branch instruction can skip the dependence chain in
-  // BB1, but misprediction has a penalty, so only when the dependence chain is
-  // longer than DependenceChainLatency, then branch is better than select.
-  // Besides misprediction penalty, the threshold value DependenceChainLatency
-  // also depends on branch misprediction rate, taken branch latency and cmov
-  // latency.
-  if (BB1Chain >= DependenceChainLatency)
-    return true;
-
-  return false;
-}
-
 /// Extract ConstantInt from value, looking through IntToPtr
 /// and PointerNullValue. Return NULL if value is not a constant int.
 static ConstantInt *GetConstantInt(Value *V, const DataLayout &DL) {
@@ -2214,11 +2044,6 @@ static bool SpeculativelyExecuteBB(Branc
   if (!HaveRewritablePHIs && !(HoistCondStores && SpeculatedStoreValue))
     return false;
 
-  // Don't do if conversion for long dependence chain.
-  if (FindLongDependenceChain(BB, EndBB, ThenBB, nullptr,
-                              CountBBCodeSize(ThenBB, TTI), TTI))
-    return false;
-
   // If we get here, we can hoist the instruction and if-convert.
   DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
 
@@ -2526,10 +2351,6 @@ static bool FoldTwoEntryPHINode(PHINode
       }
   }
 
-  if (FindLongDependenceChain(DomBlock, BB, IfBlock1, IfBlock2,
-                              AggressiveInsts.size(), TTI))
-    return false;
-
   DEBUG(dbgs() << "FOUND IF CONDITION!  " << *IfCond << "  T: "
                << IfTrue->getName() << "  F: " << IfFalse->getName() << "\n");
 

Removed: llvm/trunk/test/Transforms/SimplifyCFG/X86/if-conversion.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/X86/if-conversion.ll?rev=321527&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/X86/if-conversion.ll (original)
+++ llvm/trunk/test/Transforms/SimplifyCFG/X86/if-conversion.ll (removed)
@@ -1,231 +0,0 @@
-; RUN: opt < %s -simplifycfg -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -S | FileCheck %s
-; Avoid if-conversion if there is a long dependence chain.
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-
-; The first several cases test FindLongDependenceChain returns true, so
-; if-conversion is blocked.
-
-define i64 @test1(i64** %pp, i64* %p) {
-entry:
-  %0 = load i64*, i64** %pp, align 8
-  %1 = load i64, i64* %0, align 8
-  %cmp = icmp slt i64 %1, 0
-  %pint = ptrtoint i64* %p to i64
-  br i1 %cmp, label %cond.true, label %cond.false
-
-cond.true:
-  %p1 = add i64 %pint, 8
-  br label %cond.end
-
-cond.false:
-  %p2 = or i64 %pint, 16
-  br label %cond.end
-
-cond.end:
-  %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false]
-  %ptr = inttoptr i64 %p3 to i64*
-  %val = load i64, i64* %ptr, align 8
-  ret i64 %val
-
-; CHECK-NOT: select
-}
-
-define i64 @test2(i64** %pp, i64* %p) {
-entry:
-  %0 = load i64*, i64** %pp, align 8
-  %1 = load i64, i64* %0, align 8
-  %cmp = icmp slt i64 %1, 0
-  %pint = ptrtoint i64* %p to i64
-  br i1 %cmp, label %cond.true, label %cond.false
-
-cond.true:
-  %p1 = add i64 %pint, 8
-  br label %cond.end
-
-cond.false:
-  %p2 = add i64 %pint, 16
-  br label %cond.end
-
-cond.end:
-  %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false]
-  %ptr = inttoptr i64 %p3 to i64*
-  %val = load i64, i64* %ptr, align 8
-  ret i64 %val
-
-; CHECK-LABEL: @test2
-; CHECK-NOT: select
-}
-
-; The following cases test FindLongDependenceChain returns false, so
-; if-conversion will proceed.
-
-; Non trivial LatencyAdjustment.
-define i64 @test3(i64** %pp, i64* %p) {
-entry:
-  %0 = load i64*, i64** %pp, align 8
-  %1 = load i64, i64* %0, align 8
-  %cmp = icmp slt i64 %1, 0
-  %pint = ptrtoint i64* %p to i64
-  br i1 %cmp, label %cond.true, label %cond.false
-
-cond.true:
-  %p1 = add i64 %pint, 8
-  br label %cond.end
-
-cond.false:
-  %p2 = or i64 %pint, 16
-  br label %cond.end
-
-cond.end:
-  %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false]
-  %p4 = add i64 %p3, %1
-  %ptr = inttoptr i64 %p4 to i64*
-  %val = load i64, i64* %ptr, align 8
-  ret i64 %val
-
-; CHECK-LABEL: @test3
-; CHECK: select
-}
-
-; Short dependence chain.
-define i64 @test4(i64* %pp, i64* %p) {
-entry:
-  %0 = load i64, i64* %pp, align 8
-  %cmp = icmp slt i64 %0, 0
-  %pint = ptrtoint i64* %p to i64
-  br i1 %cmp, label %cond.true, label %cond.false
-
-cond.true:
-  %p1 = add i64 %pint, 8
-  br label %cond.end
-
-cond.false:
-  %p2 = or i64 %pint, 16
-  br label %cond.end
-
-cond.end:
-  %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false]
-  %ptr = inttoptr i64 %p3 to i64*
-  %val = load i64, i64* %ptr, align 8
-  ret i64 %val
-
-; CHECK-LABEL: @test4
-; CHECK: select
-}
-
-; High IPC.
-define i64 @test5(i64** %pp, i64* %p) {
-entry:
-  %0 = load i64*, i64** %pp, align 8
-  %1 = load i64, i64* %0, align 8
-  %cmp = icmp slt i64 %1, 0
-  %pint = ptrtoint i64* %p to i64
-  %2 = add i64 %pint, 2
-  %3 = add i64 %pint, 3
-  %4 = or i64 %pint, 16
-  %5 = and i64 %pint, 255
-
-  %6 = or i64 %2, 9
-  %7 = and i64 %3, 255
-  %8 = add i64 %4, 4
-  %9 = add i64 %5, 5
-
-  %10 = add i64 %6, 2
-  %11 = add i64 %7, 3
-  %12 = add i64 %8, 4
-  %13 = add i64 %9, 5
-
-  %14 = add i64 %10, 6
-  %15 = add i64 %11, 7
-  %16 = add i64 %12, 8
-  %17 = add i64 %13, 9
-
-  %18 = add i64 %14, 10
-  %19 = add i64 %15, 11
-  %20 = add i64 %16, 12
-  %21 = add i64 %17, 13
-
-  br i1 %cmp, label %cond.true, label %cond.false
-
-cond.true:
-  %p1 = add i64 %pint, 8
-  br label %cond.end
-
-cond.false:
-  %p2 = or i64 %pint, 16
-  br label %cond.end
-
-cond.end:
-  %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false]
-  %ptr = inttoptr i64 %p3 to i64*
-  %val = load i64, i64* %ptr, align 8
-
-  ret i64 %val
-
-; CHECK-LABEL: @test5
-; CHECK: select
-}
-
-; Large BB size.
-define i64 @test6(i64** %pp, i64* %p) {
-entry:
-  %0 = load i64*, i64** %pp, align 8
-  %1 = load i64, i64* %0, align 8
-  %cmp = icmp slt i64 %1, 0
-  %pint = ptrtoint i64* %p to i64
-  br i1 %cmp, label %cond.true, label %cond.false
-
-cond.true:
-  %p1 = add i64 %pint, 8
-  br label %cond.end
-
-cond.false:
-  %p2 = or i64 %pint, 16
-  br label %cond.end
-
-cond.end:
-  %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false]
-  %ptr = inttoptr i64 %p3 to i64*
-  %val = load i64, i64* %ptr, align 8
-  %2 = add i64 %pint, 2
-  %3 = add i64 %pint, 3
-  %4 = add i64 %2, 4
-  %5 = add i64 %3, 5
-  %6 = add i64 %4, 6
-  %7 = add i64 %5, 7
-  %8 = add i64 %6, 6
-  %9 = add i64 %7, 7
-  %10 = add i64 %8, 6
-  %11 = add i64 %9, 7
-  %12 = add i64 %10, 6
-  %13 = add i64 %11, 7
-  %14 = add i64 %12, 6
-  %15 = add i64 %13, 7
-  %16 = add i64 %14, 6
-  %17 = add i64 %15, 7
-  %18 = add i64 %16, 6
-  %19 = add i64 %17, 7
-  %20 = add i64 %18, 6
-  %21 = add i64 %19, 7
-  %22 = add i64 %20, 6
-  %23 = add i64 %21, 7
-  %24 = add i64 %22, 6
-  %25 = add i64 %23, 7
-  %26 = add i64 %24, 6
-  %27 = add i64 %25, 7
-  %28 = add i64 %26, 6
-  %29 = add i64 %27, 7
-  %30 = add i64 %28, 6
-  %31 = add i64 %29, 7
-  %32 = add i64 %30, 8
-  %33 = add i64 %31, 9
-  %34 = add i64 %32, %33
-  %35 = and i64 %34, 255
-  %res = add i64 %val, %35
-
-  ret i64 %res
-
-; CHECK-LABEL: @test6
-; CHECK: select
-}




More information about the llvm-commits mailing list