[llvm] r275382 - This implements a more optimal algorithm for selecting a base constant in

Thu Jul 14 00:44:21 PDT 2016

Author: sjoerdmeijer
Date: Thu Jul 14 02:44:20 2016
New Revision: 275382

URL: http://llvm.org/viewvc/llvm-project?rev=275382&view=rev
Log:
This implements a more optimal algorithm for selecting a base constant in
constant hoisting. It not only takes into account the number of uses and the
cost of expressions in which constants appear, but now also the resulting
integer range of the offsets. Thus, the algorithm maximizes the number of uses
within an integer range that will enable more efficient code generation. On
ARM, for example, this will enable code size optimisations because less
negative offsets will be created. Negative offsets/immediates are not supported
by Thumb1 thus preventing more compact instruction encoding.

Differential Revision: http://reviews.llvm.org/D21183


Added:
    llvm/trunk/test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll
Modified:
    llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
    llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h
    llvm/trunk/include/llvm/Transforms/Scalar/ConstantHoisting.h
    llvm/trunk/lib/Analysis/TargetTransformInfo.cpp
    llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp
    llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.h
    llvm/trunk/lib/Transforms/Scalar/ConstantHoisting.cpp

Modified: llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h?rev=275382&r1=275381&r2=275382&view=diff
==============================================================================

--- llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h (original)
+++ llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h Thu Jul 14 02:44:20 2016
@@ -414,6 +414,16 @@ public:
                     Type *Ty) const;
   int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
                     Type *Ty) const;
+
+  /// \brief Return the expected cost for the given integer when optimising
+  /// for size. This is different than the other integer immediate cost
+  /// functions in that it is subtarget agnostic. This is useful when you e.g.
+  /// target one ISA such as Aarch32 but smaller encodings could be possible
+  /// with another such as Thumb. This return value is used as a penalty when
+  /// the total costs for a constant is calculated (the bigger the cost, the
+  /// more beneficial constant hoisting is).
+  int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
+                            Type *Ty) const;
   /// @}
 
   /// \name Vector Target Information
@@ -665,6 +675,8 @@ public:
   virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
   virtual bool haveFastSqrt(Type *Ty) = 0;
   virtual int getFPOpCost(Type *Ty) = 0;
+  virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
+                                    Type *Ty) = 0;
   virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0;
   virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
                             Type *Ty) = 0;
@@ -841,6 +853,10 @@ public:
 
   int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }
 
+  int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
+                            Type *Ty) override {
+    return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
+  }
   int getIntImmCost(const APInt &Imm, Type *Ty) override {
     return Impl.getIntImmCost(Imm, Ty);
   }

Modified: llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h?rev=275382&r1=275381&r2=275382&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h (original)
+++ llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h Thu Jul 14 02:44:20 2016
@@ -257,6 +257,11 @@ public:
 
   unsigned getFPOpCost(Type *Ty) { return TargetTransformInfo::TCC_Basic; }
 
+  int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+                            Type *Ty) {
+    return 0;
+  }
+
   unsigned getIntImmCost(const APInt &Imm, Type *Ty) { return TTI::TCC_Basic; }
 
   unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,

Modified: llvm/trunk/include/llvm/Transforms/Scalar/ConstantHoisting.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Scalar/ConstantHoisting.h?rev=275382&r1=275381&r2=275382&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Transforms/Scalar/ConstantHoisting.h (original)
+++ llvm/trunk/include/llvm/Transforms/Scalar/ConstantHoisting.h Thu Jul 14 02:44:20 2016
@@ -134,6 +134,9 @@ private:
   void collectConstantCandidates(Function &Fn);
   void findAndMakeBaseConstant(ConstCandVecType::iterator S,
                                ConstCandVecType::iterator E);
+  unsigned maximizeConstantsInRange(ConstCandVecType::iterator S,
+                                    ConstCandVecType::iterator E,
+                                    ConstCandVecType::iterator &MaxCostItr);
   void findBaseConstants();
   void emitBaseConstants(Instruction *Base, Constant *Offset,
                          const consthoist::ConstantUser &ConstUser);

Modified: llvm/trunk/lib/Analysis/TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/TargetTransformInfo.cpp?rev=275382&r1=275381&r2=275382&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Analysis/TargetTransformInfo.cpp Thu Jul 14 02:44:20 2016
@@ -209,6 +209,14 @@ int TargetTransformInfo::getFPOpCost(Typ
   return Cost;
 }
 
+int TargetTransformInfo::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
+                                               const APInt &Imm,
+                                               Type *Ty) const {
+  int Cost = TTIImpl->getIntImmCodeSizeCost(Opcode, Idx, Imm, Ty);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
+}
+
 int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
   int Cost = TTIImpl->getIntImmCost(Imm, Ty);
   assert(Cost >= 0 && "TTI should not produce negative costs!");

Modified: llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp?rev=275382&r1=275381&r2=275382&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp Thu Jul 14 02:44:20 2016
@@ -47,6 +47,17 @@ int ARMTTIImpl::getIntImmCost(const APIn
   return 3;
 }
 
+
+// Constants smaller than 256 fit in the immediate field of
+// Thumb1 instructions so we return a zero cost and 1 otherwise.
+int ARMTTIImpl::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
+                                      const APInt &Imm, Type *Ty) {
+  if (Imm.isNonNegative() && Imm.getLimitedValue() < 256)
+    return 0;
+
+  return 1;
+}
+
 int ARMTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
                               Type *Ty) {
   // Division by a constant can be turned into multiplication, but only if we

Modified: llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.h?rev=275382&r1=275381&r2=275382&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.h Thu Jul 14 02:44:20 2016
@@ -64,6 +64,9 @@ public:
   /// \name Scalar TTI Implementations
   /// @{
 
+  int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+                            Type *Ty);
+
   using BaseT::getIntImmCost;
   int getIntImmCost(const APInt &Imm, Type *Ty);
 

Modified: llvm/trunk/lib/Transforms/Scalar/ConstantHoisting.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/ConstantHoisting.cpp?rev=275382&r1=275381&r2=275382&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/ConstantHoisting.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/ConstantHoisting.cpp Thu Jul 14 02:44:20 2016
@@ -285,18 +285,109 @@ void ConstantHoistingPass::collectConsta
       collectConstantCandidates(ConstCandMap, &Inst);
 }
 
-/// \brief Find the base constant within the given range and rebase all other
-/// constants with respect to the base constant.
-void ConstantHoistingPass::findAndMakeBaseConstant(
-    ConstCandVecType::iterator S, ConstCandVecType::iterator E) {
-  auto MaxCostItr = S;
+// This helper function is necessary to deal with values that have different
+// bit widths (APInt Operator- does not like that). If the value cannot be
+// represented in uint64 we return an "empty" APInt. This is then interpreted
+// as the value is not in range.
+static llvm::Optional<APInt> calculateOffsetDiff(APInt V1, APInt V2)
+{
+  llvm::Optional<APInt> Res = None;
+  unsigned BW = V1.getBitWidth() > V2.getBitWidth() ?
+                V1.getBitWidth() : V2.getBitWidth();
+  uint64_t LimVal1 = V1.getLimitedValue();
+  uint64_t LimVal2 = V2.getLimitedValue();
+
+  if (LimVal1 == ~0ULL || LimVal2 == ~0ULL)
+    return Res;
+
+  uint64_t Diff = LimVal1 - LimVal2;
+  return APInt(BW, Diff, true);
+}
+
+// From a list of constants, one needs to picked as the base and the other
+// constants will be transformed into an offset from that base constant. The
+// question is which we can pick best? For example, consider these constants
+// and their number of uses:
+//
+//  Constants| 2 | 4 | 12 | 42 |
+//  NumUses  | 3 | 2 |  8 |  7 |
+//
+// Selecting constant 12 because it has the most uses will generate negative
+// offsets for constants 2 and 4 (i.e. -10 and -8 respectively). If negative
+// offsets lead to less optimal code generation, then there might be better
+// solutions. Suppose immediates in the range of 0..35 are most optimally
+// supported by the architecture, then selecting constant 2 is most optimal
+// because this will generate offsets: 0, 2, 10, 40. Offsets 0, 2 and 10 are in
+// range 0..35, and thus 3 + 2 + 8 = 13 uses are in range. Selecting 12 would
+// have only 8 uses in range, so choosing 2 as a base is more optimal. Thus, in
+// selecting the base constant the range of the offsets is a very important
+// factor too that we take into account here. This algorithm calculates a total
+// costs for selecting a constant as the base and substract the costs if
+// immediates are out of range. It has quadratic complexity, so we call this
+// function only when we're optimising for size and there are less than 100
+// constants, we fall back to the straightforward algorithm otherwise
+// which does not do all the offset calculations.
+unsigned
+ConstantHoistingPass::maximizeConstantsInRange(ConstCandVecType::iterator S,
+                                           ConstCandVecType::iterator E,
+                                           ConstCandVecType::iterator &MaxCostItr) {
   unsigned NumUses = 0;
-  // Use the constant that has the maximum cost as base constant.
+
+  if(!Entry->getParent()->optForSize() || std::distance(S,E) > 100) {
+    for (auto ConstCand = S; ConstCand != E; ++ConstCand) {
+      NumUses += ConstCand->Uses.size();
+      if (ConstCand->CumulativeCost > MaxCostItr->CumulativeCost)
+        MaxCostItr = ConstCand;
+    }
+    return NumUses;
+  }
+
+  DEBUG(dbgs() << "== Maximize constants in range ==\n");
+  int MaxCost = -1;
   for (auto ConstCand = S; ConstCand != E; ++ConstCand) {
+    auto Value = ConstCand->ConstInt->getValue();
+    Type *Ty = ConstCand->ConstInt->getType();
+    int Cost = 0;
     NumUses += ConstCand->Uses.size();
-    if (ConstCand->CumulativeCost > MaxCostItr->CumulativeCost)
+    DEBUG(dbgs() << "= Constant: " << ConstCand->ConstInt->getValue() << "\n");
+
+    for (auto User : ConstCand->Uses) {
+      unsigned Opcode = User.Inst->getOpcode();
+      unsigned OpndIdx = User.OpndIdx;
+      Cost += TTI->getIntImmCost(Opcode, OpndIdx, Value, Ty);
+      DEBUG(dbgs() << "Cost: " << Cost << "\n");
+
+      for (auto C2 = S; C2 != E; ++C2) {
+        llvm::Optional<APInt> Diff = calculateOffsetDiff(
+                                      C2->ConstInt->getValue(),
+                                      ConstCand->ConstInt->getValue());
+        if (Diff) {
+          const int ImmCosts =
+            TTI->getIntImmCodeSizeCost(Opcode, OpndIdx, Diff.getValue(), Ty);
+          Cost -= ImmCosts;
+          DEBUG(dbgs() << "Offset " << Diff.getValue() << " "
+                       << "has penalty: " << ImmCosts << "\n"
+                       << "Adjusted cost: " << Cost << "\n");
+        }
+      }
+    }
+    DEBUG(dbgs() << "Cumulative cost: " << Cost << "\n");
+    if (Cost > MaxCost) {
+      MaxCost = Cost;
       MaxCostItr = ConstCand;
+      DEBUG(dbgs() << "New candidate: " << MaxCostItr->ConstInt->getValue()
+                   << "\n");
+    }
   }
+  return NumUses;
+}
+
+/// \brief Find the base constant within the given range and rebase all other
+/// constants with respect to the base constant.
+void ConstantHoistingPass::findAndMakeBaseConstant(
+    ConstCandVecType::iterator S, ConstCandVecType::iterator E) {
+  auto MaxCostItr = S;
+  unsigned NumUses = maximizeConstantsInRange(S, E, MaxCostItr);
 
   // Don't hoist constants that have only one use.
   if (NumUses <= 1)

Added: llvm/trunk/test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll?rev=275382&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll (added)
+++ llvm/trunk/test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll Thu Jul 14 02:44:20 2016
@@ -0,0 +1,42 @@
+; RUN: opt -mtriple=arm-arm-none-eabi -consthoist -S < %s | FileCheck %s
+
+; There are different candidates here for the base constant: 1073876992 and
+; 1073876996. But we don't want to see the latter because it results in
+; negative offsets.
+
+define void @foo() #0 {
+entry:
+; CHECK-LABEL: @foo
+; CHECK-NOT: [[CONST1:%const_mat[0-9]*]] = add i32 %const, -4
+  %0 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096
+  %or = or i32 %0, 1
+  store volatile i32 %or, i32* inttoptr (i32 1073876992 to i32*), align 4096
+  %1 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4
+  %and = and i32 %1, -117506048
+  store volatile i32 %and, i32* inttoptr (i32 1073876996 to i32*), align 4
+  %2 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096
+  %and1 = and i32 %2, -17367041
+  store volatile i32 %and1, i32* inttoptr (i32 1073876996 to i32*), align 4096
+  %3 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096
+  %and2 = and i32 %3, -262145
+  store volatile i32 %and2, i32* inttoptr (i32 1073876992 to i32*), align 4096
+  %4 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4
+  %and3 = and i32 %4, -8323073
+  store volatile i32 %and3, i32* inttoptr (i32 1073876996 to i32*), align 4
+  store volatile i32 10420224, i32* inttoptr (i32 1073877000 to i32*), align 8
+  %5 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4096
+  %or4 = or i32 %5, 65536
+  store volatile i32 %or4, i32* inttoptr (i32 1073876996 to i32*), align 4096
+  %6 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192
+  %or6.i.i = or i32 %6, 16
+  store volatile i32 %or6.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192
+  %7 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192
+  %and7.i.i = and i32 %7, -4
+  store volatile i32 %and7.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192
+  %8 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192
+  %or8.i.i = or i32 %8, 2
+  store volatile i32 %or8.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192
+  ret void
+}
+
+attributes #0 = { minsize norecurse nounwind optsize readnone uwtable }