[llvm] 206f10d - Plumb InstructionCost through unroll costing
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 9 15:43:04 PDT 2022
Author: Philip Reames
Date: 2022-06-09T15:42:53-07:00
New Revision: 206f10d3f6aa3fe4bf963675fe579e7554bad330
URL: https://github.com/llvm/llvm-project/commit/206f10d3f6aa3fe4bf963675fe579e7554bad330
DIFF: https://github.com/llvm/llvm-project/commit/206f10d3f6aa3fe4bf963675fe579e7554bad330.diff
LOG: Plumb InstructionCost through unroll costing
Teach the unroller(s) how to handle an invalid cost. This avoids crashes when the backend can't provide a cost due to either a fundemental limitation or an unimplemented cost model case.
Differential Revision: https://reviews.llvm.org/D127305
Added:
llvm/test/Transforms/LoopUnroll/RISCV/invalid-cost.ll
Modified:
llvm/include/llvm/Transforms/Utils/UnrollLoop.h
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
index 320c36b36924..65fe8eff6442 100644
--- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
+++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
@@ -17,6 +17,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Support/InstructionCost.h"
namespace llvm {
@@ -123,11 +124,9 @@ TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
Optional<bool> UserUpperBound, Optional<unsigned> UserFullUnrollMaxCount);
-unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
- bool &NotDuplicatable, bool &Convergent,
- const TargetTransformInfo &TTI,
- const SmallPtrSetImpl<const Value *> &EphValues,
- unsigned BEInsns);
+InstructionCost ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
+ bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI,
+ const SmallPtrSetImpl<const Value *> &EphValues, unsigned BEInsns);
} // end namespace llvm
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index a4aa69ad2a04..b00246760ea6 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -3013,16 +3013,17 @@ static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI) {
unsigned NumInlineCandidates;
bool NotDuplicatable;
bool Convergent;
- unsigned LoopSize =
+ InstructionCost LoopSizeIC =
ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
TTI, EphValues, UP.BEInsns);
- LLVM_DEBUG(dbgs() << "Estimated loop size is " << LoopSize << "\n");
+ LLVM_DEBUG(dbgs() << "Estimated loop size is " << LoopSizeIC << "\n");
// Loop is not unrollable if the loop contains certain instructions.
- if (NotDuplicatable || Convergent) {
+ if (NotDuplicatable || Convergent || !LoopSizeIC.isValid()) {
LLVM_DEBUG(dbgs() << "Loop not considered unrollable\n");
return 1;
}
+ unsigned LoopSize = *LoopSizeIC.getValue();
// TODO: Determine trip count of \p CLI if constant, computeUnrollCount might
// be able to use it.
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
index 5361259a8ef8..a33e5cca09b9 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
@@ -330,14 +330,23 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
SmallPtrSet<const Value *, 32> EphValues;
CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
Loop *SubLoop = L->getSubLoops()[0];
- unsigned InnerLoopSize =
+ InstructionCost InnerLoopSizeIC =
ApproximateLoopSize(SubLoop, NumInlineCandidates, NotDuplicatable,
Convergent, TTI, EphValues, UP.BEInsns);
- unsigned OuterLoopSize =
+ InstructionCost OuterLoopSizeIC =
ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
TTI, EphValues, UP.BEInsns);
- LLVM_DEBUG(dbgs() << " Outer Loop Size: " << OuterLoopSize << "\n");
- LLVM_DEBUG(dbgs() << " Inner Loop Size: " << InnerLoopSize << "\n");
+ LLVM_DEBUG(dbgs() << " Outer Loop Size: " << OuterLoopSizeIC << "\n");
+ LLVM_DEBUG(dbgs() << " Inner Loop Size: " << InnerLoopSizeIC << "\n");
+
+ if (!InnerLoopSizeIC.isValid() || !OuterLoopSizeIC.isValid()) {
+ LLVM_DEBUG(dbgs() << " Not unrolling loop which contains instructions"
+ << " with invalid cost.\n");
+ return LoopUnrollResult::Unmodified;
+ }
+ unsigned InnerLoopSize = *InnerLoopSizeIC.getValue();
+ unsigned OuterLoopSize = *OuterLoopSizeIC.getValue();
+
if (NotDuplicatable) {
LLVM_DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable "
"instructions.\n");
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index bd7189c6fcd7..196951335300 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -663,7 +663,7 @@ static Optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
}
/// ApproximateLoopSize - Approximate the size of the loop.
-unsigned llvm::ApproximateLoopSize(
+InstructionCost llvm::ApproximateLoopSize(
const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent,
const TargetTransformInfo &TTI,
const SmallPtrSetImpl<const Value *> &EphValues, unsigned BEInsns) {
@@ -674,9 +674,7 @@ unsigned llvm::ApproximateLoopSize(
NotDuplicatable = Metrics.notDuplicatable;
Convergent = Metrics.convergent;
- // FIXME: This will crash for invalid InstructionCost, we should update the
- // callers to gracefully bailout in this case.
- unsigned LoopSize = *Metrics.NumInsts.getValue();
+ InstructionCost LoopSize = Metrics.NumInsts;
// Don't allow an estimate of size zero. This would allows unrolling of loops
// with huge iteration counts, which is a compile time problem even if it's
@@ -684,7 +682,9 @@ unsigned llvm::ApproximateLoopSize(
// that each loop has at least three instructions (likely a conditional
// branch, a comparison feeding that branch, and some kind of loop increment
// feeding that comparison instruction).
- LoopSize = std::max(LoopSize, BEInsns + 1);
+ if (LoopSize.isValid() && *LoopSize.getValue() < BEInsns + 1)
+ // This is an open coded max() on InstructionCost
+ LoopSize = BEInsns + 1;
return LoopSize;
}
@@ -1191,10 +1191,18 @@ static LoopUnrollResult tryToUnrollLoop(
SmallPtrSet<const Value *, 32> EphValues;
CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
- unsigned LoopSize =
+ InstructionCost LoopSizeIC =
ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
TTI, EphValues, UP.BEInsns);
- LLVM_DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
+ LLVM_DEBUG(dbgs() << " Loop Size = " << LoopSizeIC << "\n");
+
+ if (!LoopSizeIC.isValid()) {
+ LLVM_DEBUG(dbgs() << " Not unrolling loop which contains instructions"
+ << " with invalid cost.\n");
+ return LoopUnrollResult::Unmodified;
+ }
+ unsigned LoopSize = *LoopSizeIC.getValue();
+
if (NotDuplicatable) {
LLVM_DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable"
<< " instructions.\n");
diff --git a/llvm/test/Transforms/LoopUnroll/RISCV/invalid-cost.ll b/llvm/test/Transforms/LoopUnroll/RISCV/invalid-cost.ll
new file mode 100644
index 000000000000..53bf8f100b00
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/RISCV/invalid-cost.ll
@@ -0,0 +1,44 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt %s -S -mtriple=riscv64 -loop-unroll | FileCheck %s
+
+; Demonstrate handling of invalid costs in LoopUnroll. This test uses
+; scalable vectors on RISCV w/o +V to create a situation where a construct
+; can not be lowered, and is thus invalid regardless of what the target
+; does or does not implement in terms of a cost model.
+
+target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
+target triple = "riscv64-unknown-unknown"
+
+define void @invalid(<vscale x 1 x i8>* %p) nounwind ssp {
+; CHECK-LABEL: @invalid(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[A:%.*]] = load <vscale x 1 x i8>, <vscale x 1 x i8>* [[P:%.*]], align 1
+; CHECK-NEXT: [[B:%.*]] = add <vscale x 1 x i8> [[A]], [[A]]
+; CHECK-NEXT: store <vscale x 1 x i8> [[B]], <vscale x 1 x i8>* [[P]], align 1
+; CHECK-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 10
+; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %a = load <vscale x 1 x i8>, <vscale x 1 x i8>* %p
+ %b = add <vscale x 1 x i8> %a, %a
+ store <vscale x 1 x i8> %b, <vscale x 1 x i8>* %p
+ %inc = add nsw i32 %i.0, 1
+ %cmp = icmp slt i32 %i.0, 10
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+
+declare void @f()
More information about the llvm-commits
mailing list