[llvm] 206f10d - Plumb InstructionCost through unroll costing

Thu Jun 9 15:43:04 PDT 2022

Author: Philip Reames
Date: 2022-06-09T15:42:53-07:00
New Revision: 206f10d3f6aa3fe4bf963675fe579e7554bad330

URL: https://github.com/llvm/llvm-project/commit/206f10d3f6aa3fe4bf963675fe579e7554bad330
DIFF: https://github.com/llvm/llvm-project/commit/206f10d3f6aa3fe4bf963675fe579e7554bad330.diff

LOG: Plumb InstructionCost through unroll costing

Teach the unroller(s) how to handle an invalid cost. This avoids crashes when the backend can't provide a cost due to either a fundemental limitation or an unimplemented cost model case.

Differential Revision: https://reviews.llvm.org/D127305

Added: 
    llvm/test/Transforms/LoopUnroll/RISCV/invalid-cost.ll

Modified: 
    llvm/include/llvm/Transforms/Utils/UnrollLoop.h
    llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
    llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
    llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
index 320c36b36924..65fe8eff6442 100644

--- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
+++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
@@ -17,6 +17,7 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Support/InstructionCost.h"
 
 namespace llvm {
 
@@ -123,11 +124,9 @@ TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
     Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
     Optional<bool> UserUpperBound, Optional<unsigned> UserFullUnrollMaxCount);
 
-unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
-                             bool &NotDuplicatable, bool &Convergent,
-                             const TargetTransformInfo &TTI,
-                             const SmallPtrSetImpl<const Value *> &EphValues,
-                             unsigned BEInsns);
+InstructionCost ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
+    bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI,
+    const SmallPtrSetImpl<const Value *> &EphValues, unsigned BEInsns);
 
 } // end namespace llvm
 

diff  --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index a4aa69ad2a04..b00246760ea6 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -3013,16 +3013,17 @@ static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI) {
   unsigned NumInlineCandidates;
   bool NotDuplicatable;
   bool Convergent;
-  unsigned LoopSize =
+  InstructionCost LoopSizeIC =
       ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
                           TTI, EphValues, UP.BEInsns);
-  LLVM_DEBUG(dbgs() << "Estimated loop size is " << LoopSize << "\n");
+  LLVM_DEBUG(dbgs() << "Estimated loop size is " << LoopSizeIC << "\n");
 
   // Loop is not unrollable if the loop contains certain instructions.
-  if (NotDuplicatable || Convergent) {
+  if (NotDuplicatable || Convergent || !LoopSizeIC.isValid()) {
     LLVM_DEBUG(dbgs() << "Loop not considered unrollable\n");
     return 1;
   }
+  unsigned LoopSize = *LoopSizeIC.getValue();
 
   // TODO: Determine trip count of \p CLI if constant, computeUnrollCount might
   // be able to use it.

diff  --git a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
index 5361259a8ef8..a33e5cca09b9 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
@@ -330,14 +330,23 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
   SmallPtrSet<const Value *, 32> EphValues;
   CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
   Loop *SubLoop = L->getSubLoops()[0];
-  unsigned InnerLoopSize =
+  InstructionCost InnerLoopSizeIC =
       ApproximateLoopSize(SubLoop, NumInlineCandidates, NotDuplicatable,
                           Convergent, TTI, EphValues, UP.BEInsns);
-  unsigned OuterLoopSize =
+  InstructionCost OuterLoopSizeIC =
       ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
                           TTI, EphValues, UP.BEInsns);
-  LLVM_DEBUG(dbgs() << "  Outer Loop Size: " << OuterLoopSize << "\n");
-  LLVM_DEBUG(dbgs() << "  Inner Loop Size: " << InnerLoopSize << "\n");
+  LLVM_DEBUG(dbgs() << "  Outer Loop Size: " << OuterLoopSizeIC << "\n");
+  LLVM_DEBUG(dbgs() << "  Inner Loop Size: " << InnerLoopSizeIC << "\n");
+
+  if (!InnerLoopSizeIC.isValid() || !OuterLoopSizeIC.isValid()) {
+    LLVM_DEBUG(dbgs() << "  Not unrolling loop which contains instructions"
+                      << " with invalid cost.\n");
+    return LoopUnrollResult::Unmodified;
+  }
+  unsigned InnerLoopSize = *InnerLoopSizeIC.getValue();
+  unsigned OuterLoopSize = *OuterLoopSizeIC.getValue();
+
   if (NotDuplicatable) {
     LLVM_DEBUG(dbgs() << "  Not unrolling loop which contains non-duplicatable "
                          "instructions.\n");

diff  --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index bd7189c6fcd7..196951335300 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -663,7 +663,7 @@ static Optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
 }
 
 /// ApproximateLoopSize - Approximate the size of the loop.
-unsigned llvm::ApproximateLoopSize(
+InstructionCost llvm::ApproximateLoopSize(
     const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent,
     const TargetTransformInfo &TTI,
     const SmallPtrSetImpl<const Value *> &EphValues, unsigned BEInsns) {
@@ -674,9 +674,7 @@ unsigned llvm::ApproximateLoopSize(
   NotDuplicatable = Metrics.notDuplicatable;
   Convergent = Metrics.convergent;
 
-  // FIXME: This will crash for invalid InstructionCost, we should update the
-  // callers to gracefully bailout in this case.
-  unsigned LoopSize = *Metrics.NumInsts.getValue();
+  InstructionCost LoopSize = Metrics.NumInsts;
 
   // Don't allow an estimate of size zero.  This would allows unrolling of loops
   // with huge iteration counts, which is a compile time problem even if it's
@@ -684,7 +682,9 @@ unsigned llvm::ApproximateLoopSize(
   // that each loop has at least three instructions (likely a conditional
   // branch, a comparison feeding that branch, and some kind of loop increment
   // feeding that comparison instruction).
-  LoopSize = std::max(LoopSize, BEInsns + 1);
+  if (LoopSize.isValid() && *LoopSize.getValue() < BEInsns + 1)
+    // This is an open coded max() on InstructionCost
+    LoopSize = BEInsns + 1;
 
   return LoopSize;
 }
@@ -1191,10 +1191,18 @@ static LoopUnrollResult tryToUnrollLoop(
   SmallPtrSet<const Value *, 32> EphValues;
   CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
 
-  unsigned LoopSize =
+  InstructionCost LoopSizeIC =
       ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
                           TTI, EphValues, UP.BEInsns);
-  LLVM_DEBUG(dbgs() << "  Loop Size = " << LoopSize << "\n");
+  LLVM_DEBUG(dbgs() << "  Loop Size = " << LoopSizeIC << "\n");
+
+  if (!LoopSizeIC.isValid()) {
+    LLVM_DEBUG(dbgs() << "  Not unrolling loop which contains instructions"
+                      << " with invalid cost.\n");
+    return LoopUnrollResult::Unmodified;
+  }
+  unsigned LoopSize = *LoopSizeIC.getValue();
+
   if (NotDuplicatable) {
     LLVM_DEBUG(dbgs() << "  Not unrolling loop which contains non-duplicatable"
                       << " instructions.\n");

diff  --git a/llvm/test/Transforms/LoopUnroll/RISCV/invalid-cost.ll b/llvm/test/Transforms/LoopUnroll/RISCV/invalid-cost.ll
new file mode 100644
index 000000000000..53bf8f100b00
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/RISCV/invalid-cost.ll
@@ -0,0 +1,44 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt %s -S -mtriple=riscv64 -loop-unroll | FileCheck %s
+
+; Demonstrate handling of invalid costs in LoopUnroll.  This test uses
+; scalable vectors on RISCV w/o +V to create a situation where a construct
+; can not be lowered, and is thus invalid regardless of what the target
+; does or does not implement in terms of a cost model.
+
+target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
+target triple = "riscv64-unknown-unknown"
+
+define void @invalid(<vscale x 1 x i8>* %p) nounwind ssp {
+; CHECK-LABEL: @invalid(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[A:%.*]] = load <vscale x 1 x i8>, <vscale x 1 x i8>* [[P:%.*]], align 1
+; CHECK-NEXT:    [[B:%.*]] = add <vscale x 1 x i8> [[A]], [[A]]
+; CHECK-NEXT:    store <vscale x 1 x i8> [[B]], <vscale x 1 x i8>* [[P]], align 1
+; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 10
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %a = load <vscale x 1 x i8>, <vscale x 1 x i8>* %p
+  %b = add <vscale x 1 x i8> %a, %a
+  store <vscale x 1 x i8> %b, <vscale x 1 x i8>* %p
+  %inc = add nsw i32 %i.0, 1
+  %cmp = icmp slt i32 %i.0, 10
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+declare void @f()