[llvm] f85c507 - Pipe potentially invalid InstructionCost through CodeMetrics
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 9 15:17:33 PDT 2022
Author: Philip Reames
Date: 2022-06-09T15:17:24-07:00
New Revision: f85c5079b8d093ed9867733fac2946e3a50ed039
URL: https://github.com/llvm/llvm-project/commit/f85c5079b8d093ed9867733fac2946e3a50ed039
DIFF: https://github.com/llvm/llvm-project/commit/f85c5079b8d093ed9867733fac2946e3a50ed039.diff
LOG: Pipe potentially invalid InstructionCost through CodeMetrics
Per the documentation in Support/InstructionCost.h, the purpose of an invalid cost is so that clients can change behavior on impossible to cost inputs. CodeMetrics was instead asserting that invalid costs never occurred.
On a target with an incomplete cost model - e.g. RISCV - this means that transformations would crash on (falsely) invalid constructs - e.g. scalable vectors. While we certainly should improve the cost model - and I plan to do so in the near future - we also shouldn't be crashing. This violates the explicitly stated purpose of an invalid InstructionCost.
I updated all of the "easy" consumers where bailouts were locally obvious. I plan to follow up with loop unroll in a following change.
Differential Revision: https://reviews.llvm.org/D127131
Added:
Modified:
llvm/include/llvm/Analysis/CodeMetrics.h
llvm/lib/Analysis/CodeMetrics.cpp
llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
llvm/test/Transforms/LoopRotate/RISCV/invalid-cost.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Analysis/CodeMetrics.h b/llvm/include/llvm/Analysis/CodeMetrics.h
index 615591aa83adf..a9431bca11251 100644
--- a/llvm/include/llvm/Analysis/CodeMetrics.h
+++ b/llvm/include/llvm/Analysis/CodeMetrics.h
@@ -15,6 +15,7 @@
#define LLVM_ANALYSIS_CODEMETRICS_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/InstructionCost.h"
namespace llvm {
class AssumptionCache;
@@ -47,14 +48,14 @@ struct CodeMetrics {
/// True if this function calls alloca (in the C sense).
bool usesDynamicAlloca = false;
- /// Number of instructions in the analyzed blocks.
- unsigned NumInsts = false;
+ /// Code size cost of the analyzed blocks.
+ InstructionCost NumInsts = 0;
/// Number of analyzed blocks.
unsigned NumBlocks = false;
/// Keeps track of basic block code size estimates.
- DenseMap<const BasicBlock *, unsigned> NumBBInsts;
+ DenseMap<const BasicBlock *, InstructionCost> NumBBInsts;
/// Keep track of the number of calls to 'big' functions.
unsigned NumCalls = false;
diff --git a/llvm/lib/Analysis/CodeMetrics.cpp b/llvm/lib/Analysis/CodeMetrics.cpp
index 97de4973ec515..6d9084215dee0 100644
--- a/llvm/lib/Analysis/CodeMetrics.cpp
+++ b/llvm/lib/Analysis/CodeMetrics.cpp
@@ -117,13 +117,6 @@ void CodeMetrics::analyzeBasicBlock(
const BasicBlock *BB, const TargetTransformInfo &TTI,
const SmallPtrSetImpl<const Value *> &EphValues, bool PrepareForLTO) {
++NumBlocks;
- // Use a proxy variable for NumInsts of type InstructionCost, so that it can
- // use InstructionCost's arithmetic properties such as saturation when this
- // feature is added to InstructionCost.
- // When storing the value back to NumInsts, we can assume all costs are Valid
- // because the IR should not contain any nodes that cannot be costed. If that
- // happens the cost-model is broken.
- InstructionCost NumInstsProxy = NumInsts;
InstructionCost NumInstsBeforeThisBB = NumInsts;
for (const Instruction &I : *BB) {
// Skip ephemeral values.
@@ -183,8 +176,7 @@ void CodeMetrics::analyzeBasicBlock(
if (InvI->cannotDuplicate())
notDuplicatable = true;
- NumInstsProxy += TTI.getUserCost(&I, TargetTransformInfo::TCK_CodeSize);
- NumInsts = *NumInstsProxy.getValue();
+ NumInsts += TTI.getUserCost(&I, TargetTransformInfo::TCK_CodeSize);
}
if (isa<ReturnInst>(BB->getTerminator()))
@@ -204,6 +196,6 @@ void CodeMetrics::analyzeBasicBlock(
notDuplicatable |= isa<IndirectBrInst>(BB->getTerminator());
// Remember NumInsts for this BB.
- InstructionCost NumInstsThisBB = NumInstsProxy - NumInstsBeforeThisBB;
- NumBBInsts[BB] = *NumInstsThisBB.getValue();
+ InstructionCost NumInstsThisBB = NumInsts - NumInstsBeforeThisBB;
+ NumBBInsts[BB] = NumInstsThisBB;
}
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index ca10ca99b1531..a96f8f4712784 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -550,9 +550,9 @@ class FunctionSpecializer {
// shouldn't specialize it. Set the specialization cost to Invalid.
// Or if the lines of codes implies that this function is easy to get
// inlined so that we shouldn't specialize it.
- if (Metrics.notDuplicatable ||
+ if (Metrics.notDuplicatable || !Metrics.NumInsts.isValid() ||
(!ForceFunctionSpecialization &&
- Metrics.NumInsts < SmallFunctionThreshold)) {
+ *Metrics.NumInsts.getValue() < SmallFunctionThreshold)) {
InstructionCost C{};
C.setInvalid();
return C;
diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
index 1ae857669b1c8..49bad10b8f30b 100644
--- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
@@ -828,6 +828,16 @@ struct TransformDFA {
});
return false;
}
+
+ if (!Metrics.NumInsts.isValid()) {
+ LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "
+ << "instructions with invalid cost.\n");
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "ConvergentInst", Switch)
+ << "Contains instructions with invalid cost.";
+ });
+ return false;
+ }
}
unsigned DuplicationCost = 0;
@@ -841,7 +851,7 @@ struct TransformDFA {
// using binary search, hence the LogBase2().
unsigned CondBranches =
APInt(32, Switch->getNumSuccessors()).ceilLogBase2();
- DuplicationCost = Metrics.NumInsts / CondBranches;
+ DuplicationCost = *Metrics.NumInsts.getValue() / CondBranches;
} else {
// Compared with jump tables, the DFA optimizer removes an indirect branch
// on each loop iteration, thus making branch prediction more precise. The
@@ -849,7 +859,7 @@ struct TransformDFA {
// predictor to make a mistake, and the more benefit there is in the DFA
// optimizer. Thus, the more branch targets there are, the lower is the
// cost of the DFA opt.
- DuplicationCost = Metrics.NumInsts / JumpTableSize;
+ DuplicationCost = *Metrics.NumInsts.getValue() / JumpTableSize;
}
LLVM_DEBUG(dbgs() << "\nDFA Jump Threading: Cost to jump thread block "
diff --git a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
index cdb3670121997..9590fbbb19945 100644
--- a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
@@ -299,7 +299,11 @@ bool LoopDataPrefetch::runOnLoop(Loop *L) {
}
Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
}
- unsigned LoopSize = Metrics.NumInsts;
+
+ if (!Metrics.NumInsts.isValid())
+ return MadeChange;
+
+ unsigned LoopSize = *Metrics.NumInsts.getValue();
if (!LoopSize)
LoopSize = 1;
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index cf5358c9e3a8e..bd7189c6fcd7c 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -674,7 +674,9 @@ unsigned llvm::ApproximateLoopSize(
NotDuplicatable = Metrics.notDuplicatable;
Convergent = Metrics.convergent;
- unsigned LoopSize = Metrics.NumInsts;
+ // FIXME: This will crash for invalid InstructionCost, we should update the
+ // callers to gracefully bailout in this case.
+ unsigned LoopSize = *Metrics.NumInsts.getValue();
// Don't allow an estimate of size zero. This would allows unrolling of loops
// with huge iteration counts, which is a compile time problem even if it's
diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
index 6dd5f6b54081a..0f33559c7e70d 100644
--- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -310,7 +310,13 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
L->dump());
return Rotated;
}
- if (Metrics.NumInsts > MaxHeaderSize) {
+ if (!Metrics.NumInsts.isValid()) {
+ LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains instructions"
+ " with invalid cost: ";
+ L->dump());
+ return Rotated;
+ }
+ if (*Metrics.NumInsts.getValue() > MaxHeaderSize) {
LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains "
<< Metrics.NumInsts
<< " instructions, which is more than the threshold ("
diff --git a/llvm/test/Transforms/LoopRotate/RISCV/invalid-cost.ll b/llvm/test/Transforms/LoopRotate/RISCV/invalid-cost.ll
index cfcfebf8f5dcc..bda853333bfc7 100644
--- a/llvm/test/Transforms/LoopRotate/RISCV/invalid-cost.ll
+++ b/llvm/test/Transforms/LoopRotate/RISCV/invalid-cost.ll
@@ -76,3 +76,46 @@ for.end: ; preds = %for.cond
ret void
}
+; This demonstrates a case where a) loop rotate needs a cost estimate to
+; know if rotation is profitable, and b) there is no cost estimate available
+; due to invalid costs in the loop. We can't rotate this loop.
+define void @invalid_dup_required(<vscale x 1 x i8>* %p) nounwind ssp {
+; CHECK-LABEL: @invalid_dup_required(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR_COND:%.*]]
+; CHECK: for.cond:
+; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
+; CHECK-NEXT: [[A:%.*]] = load <vscale x 1 x i8>, <vscale x 1 x i8>* [[P:%.*]], align 1
+; CHECK-NEXT: [[B:%.*]] = add <vscale x 1 x i8> [[A]], [[A]]
+; CHECK-NEXT: store <vscale x 1 x i8> [[B]], <vscale x 1 x i8>* [[P]], align 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
+; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: call void @f()
+; CHECK-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
+; CHECK-NEXT: br label [[FOR_COND]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.body, %entry
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %a = load <vscale x 1 x i8>, <vscale x 1 x i8>* %p
+ %b = add <vscale x 1 x i8> %a, %a
+ store <vscale x 1 x i8> %b, <vscale x 1 x i8>* %p
+ %cmp = icmp slt i32 %i.0, 100
+ br i1 %cmp, label %for.body, label %for.end
+
+
+for.body: ; preds = %for.cond
+ call void @f()
+ %inc = add nsw i32 %i.0, 1
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret void
+}
+
+declare void @f()
More information about the llvm-commits
mailing list