[llvm] f85c507 - Pipe potentially invalid InstructionCost through CodeMetrics

Thu Jun 9 15:17:33 PDT 2022

Author: Philip Reames
Date: 2022-06-09T15:17:24-07:00
New Revision: f85c5079b8d093ed9867733fac2946e3a50ed039

URL: https://github.com/llvm/llvm-project/commit/f85c5079b8d093ed9867733fac2946e3a50ed039
DIFF: https://github.com/llvm/llvm-project/commit/f85c5079b8d093ed9867733fac2946e3a50ed039.diff

LOG: Pipe potentially invalid InstructionCost through CodeMetrics

Per the documentation in Support/InstructionCost.h, the purpose of an invalid cost is so that clients can change behavior on impossible to cost inputs. CodeMetrics was instead asserting that invalid costs never occurred.

On a target with an incomplete cost model - e.g. RISCV - this means that transformations would crash on (falsely) invalid constructs - e.g. scalable vectors. While we certainly should improve the cost model - and I plan to do so in the near future - we also shouldn't be crashing. This violates the explicitly stated purpose of an invalid InstructionCost.

I updated all of the "easy" consumers where bailouts were locally obvious. I plan to follow up with loop unroll in a following change.

Differential Revision: https://reviews.llvm.org/D127131

Added: 
    

Modified: 
    llvm/include/llvm/Analysis/CodeMetrics.h
    llvm/lib/Analysis/CodeMetrics.cpp
    llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
    llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
    llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
    llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
    llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
    llvm/test/Transforms/LoopRotate/RISCV/invalid-cost.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/CodeMetrics.h b/llvm/include/llvm/Analysis/CodeMetrics.h
index 615591aa83adf..a9431bca11251 100644

--- a/llvm/include/llvm/Analysis/CodeMetrics.h
+++ b/llvm/include/llvm/Analysis/CodeMetrics.h
@@ -15,6 +15,7 @@
 #define LLVM_ANALYSIS_CODEMETRICS_H
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/InstructionCost.h"
 
 namespace llvm {
 class AssumptionCache;
@@ -47,14 +48,14 @@ struct CodeMetrics {
   /// True if this function calls alloca (in the C sense).
   bool usesDynamicAlloca = false;
 
-  /// Number of instructions in the analyzed blocks.
-  unsigned NumInsts = false;
+  /// Code size cost of the analyzed blocks.
+  InstructionCost NumInsts = 0;
 
   /// Number of analyzed blocks.
   unsigned NumBlocks = false;
 
   /// Keeps track of basic block code size estimates.
-  DenseMap<const BasicBlock *, unsigned> NumBBInsts;
+  DenseMap<const BasicBlock *, InstructionCost> NumBBInsts;
 
   /// Keep track of the number of calls to 'big' functions.
   unsigned NumCalls = false;

diff  --git a/llvm/lib/Analysis/CodeMetrics.cpp b/llvm/lib/Analysis/CodeMetrics.cpp
index 97de4973ec515..6d9084215dee0 100644
--- a/llvm/lib/Analysis/CodeMetrics.cpp
+++ b/llvm/lib/Analysis/CodeMetrics.cpp
@@ -117,13 +117,6 @@ void CodeMetrics::analyzeBasicBlock(
     const BasicBlock *BB, const TargetTransformInfo &TTI,
     const SmallPtrSetImpl<const Value *> &EphValues, bool PrepareForLTO) {
   ++NumBlocks;
-  // Use a proxy variable for NumInsts of type InstructionCost, so that it can
-  // use InstructionCost's arithmetic properties such as saturation when this
-  // feature is added to InstructionCost.
-  // When storing the value back to NumInsts, we can assume all costs are Valid
-  // because the IR should not contain any nodes that cannot be costed. If that
-  // happens the cost-model is broken.
-  InstructionCost NumInstsProxy = NumInsts;
   InstructionCost NumInstsBeforeThisBB = NumInsts;
   for (const Instruction &I : *BB) {
     // Skip ephemeral values.
@@ -183,8 +176,7 @@ void CodeMetrics::analyzeBasicBlock(
       if (InvI->cannotDuplicate())
         notDuplicatable = true;
 
-    NumInstsProxy += TTI.getUserCost(&I, TargetTransformInfo::TCK_CodeSize);
-    NumInsts = *NumInstsProxy.getValue();
+    NumInsts += TTI.getUserCost(&I, TargetTransformInfo::TCK_CodeSize);
   }
 
   if (isa<ReturnInst>(BB->getTerminator()))
@@ -204,6 +196,6 @@ void CodeMetrics::analyzeBasicBlock(
   notDuplicatable |= isa<IndirectBrInst>(BB->getTerminator());
 
   // Remember NumInsts for this BB.
-  InstructionCost NumInstsThisBB = NumInstsProxy - NumInstsBeforeThisBB;
-  NumBBInsts[BB] = *NumInstsThisBB.getValue();
+  InstructionCost NumInstsThisBB = NumInsts - NumInstsBeforeThisBB;
+  NumBBInsts[BB] = NumInstsThisBB;
 }

diff  --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index ca10ca99b1531..a96f8f4712784 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -550,9 +550,9 @@ class FunctionSpecializer {
     // shouldn't specialize it. Set the specialization cost to Invalid.
     // Or if the lines of codes implies that this function is easy to get
     // inlined so that we shouldn't specialize it.
-    if (Metrics.notDuplicatable ||
+    if (Metrics.notDuplicatable || !Metrics.NumInsts.isValid() ||
         (!ForceFunctionSpecialization &&
-         Metrics.NumInsts < SmallFunctionThreshold)) {
+         *Metrics.NumInsts.getValue() < SmallFunctionThreshold)) {
       InstructionCost C{};
       C.setInvalid();
       return C;

diff  --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
index 1ae857669b1c8..49bad10b8f30b 100644
--- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
@@ -828,6 +828,16 @@ struct TransformDFA {
         });
         return false;
       }
+
+      if (!Metrics.NumInsts.isValid()) {
+        LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "
+                          << "instructions with invalid cost.\n");
+        ORE->emit([&]() {
+          return OptimizationRemarkMissed(DEBUG_TYPE, "ConvergentInst", Switch)
+                 << "Contains instructions with invalid cost.";
+        });
+        return false;
+      }
     }
 
     unsigned DuplicationCost = 0;
@@ -841,7 +851,7 @@ struct TransformDFA {
       // using binary search, hence the LogBase2().
       unsigned CondBranches =
           APInt(32, Switch->getNumSuccessors()).ceilLogBase2();
-      DuplicationCost = Metrics.NumInsts / CondBranches;
+      DuplicationCost = *Metrics.NumInsts.getValue() / CondBranches;
     } else {
       // Compared with jump tables, the DFA optimizer removes an indirect branch
       // on each loop iteration, thus making branch prediction more precise. The
@@ -849,7 +859,7 @@ struct TransformDFA {
       // predictor to make a mistake, and the more benefit there is in the DFA
       // optimizer. Thus, the more branch targets there are, the lower is the
       // cost of the DFA opt.
-      DuplicationCost = Metrics.NumInsts / JumpTableSize;
+      DuplicationCost = *Metrics.NumInsts.getValue() / JumpTableSize;
     }
 
     LLVM_DEBUG(dbgs() << "\nDFA Jump Threading: Cost to jump thread block "

diff  --git a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
index cdb3670121997..9590fbbb19945 100644
--- a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
@@ -299,7 +299,11 @@ bool LoopDataPrefetch::runOnLoop(Loop *L) {
     }
     Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
   }
-  unsigned LoopSize = Metrics.NumInsts;
+
+  if (!Metrics.NumInsts.isValid())
+    return MadeChange;
+
+  unsigned LoopSize = *Metrics.NumInsts.getValue();
   if (!LoopSize)
     LoopSize = 1;
 

diff  --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index cf5358c9e3a8e..bd7189c6fcd7c 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -674,7 +674,9 @@ unsigned llvm::ApproximateLoopSize(
   NotDuplicatable = Metrics.notDuplicatable;
   Convergent = Metrics.convergent;
 
-  unsigned LoopSize = Metrics.NumInsts;
+  // FIXME: This will crash for invalid InstructionCost, we should update the
+  // callers to gracefully bailout in this case.
+  unsigned LoopSize = *Metrics.NumInsts.getValue();
 
   // Don't allow an estimate of size zero.  This would allows unrolling of loops
   // with huge iteration counts, which is a compile time problem even if it's

diff  --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
index 6dd5f6b54081a..0f33559c7e70d 100644
--- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -310,7 +310,13 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
                    L->dump());
         return Rotated;
       }
-      if (Metrics.NumInsts > MaxHeaderSize) {
+      if (!Metrics.NumInsts.isValid()) {
+        LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains instructions"
+                   " with invalid cost: ";
+                   L->dump());
+        return Rotated;
+      }
+      if (*Metrics.NumInsts.getValue() > MaxHeaderSize) {
         LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains "
                           << Metrics.NumInsts
                           << " instructions, which is more than the threshold ("

diff  --git a/llvm/test/Transforms/LoopRotate/RISCV/invalid-cost.ll b/llvm/test/Transforms/LoopRotate/RISCV/invalid-cost.ll
index cfcfebf8f5dcc..bda853333bfc7 100644
--- a/llvm/test/Transforms/LoopRotate/RISCV/invalid-cost.ll
+++ b/llvm/test/Transforms/LoopRotate/RISCV/invalid-cost.ll
@@ -76,3 +76,46 @@ for.end:                                          ; preds = %for.cond
   ret void
 }
 
+; This demonstrates a case where a) loop rotate needs a cost estimate to
+; know if rotation is profitable, and b) there is no cost estimate available
+; due to invalid costs in the loop.  We can't rotate this loop.
+define void @invalid_dup_required(<vscale x 1 x i8>* %p) nounwind ssp {
+; CHECK-LABEL: @invalid_dup_required(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_COND:%.*]]
+; CHECK:       for.cond:
+; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
+; CHECK-NEXT:    [[A:%.*]] = load <vscale x 1 x i8>, <vscale x 1 x i8>* [[P:%.*]], align 1
+; CHECK-NEXT:    [[B:%.*]] = add <vscale x 1 x i8> [[A]], [[A]]
+; CHECK-NEXT:    store <vscale x 1 x i8> [[B]], <vscale x 1 x i8>* [[P]], align 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    call void @f()
+; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
+; CHECK-NEXT:    br label [[FOR_COND]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %a = load <vscale x 1 x i8>, <vscale x 1 x i8>* %p
+  %b = add <vscale x 1 x i8> %a, %a
+  store <vscale x 1 x i8> %b, <vscale x 1 x i8>* %p
+  %cmp = icmp slt i32 %i.0, 100
+  br i1 %cmp, label %for.body, label %for.end
+
+
+for.body:                                         ; preds = %for.cond
+  call void @f()
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+declare void @f()