[llvm] d9085f6 - [globalisel] Add lost debug locations verifier

Daniel Sanders via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 16 10:43:59 PDT 2020


Author: Daniel Sanders
Date: 2020-04-16T10:43:35-07:00
New Revision: d9085f65db0b39fa53d31fc0533c77e91f2f4a1c

URL: https://github.com/llvm/llvm-project/commit/d9085f65db0b39fa53d31fc0533c77e91f2f4a1c
DIFF: https://github.com/llvm/llvm-project/commit/d9085f65db0b39fa53d31fc0533c77e91f2f4a1c.diff

LOG: [globalisel] Add lost debug locations verifier

Summary:
This verifier tries to ensure that DebugLoc's don't just disappear as
we transform the MIR. It observes the instructions created, erased, and
changed and at checkpoints chosen by the client algorithm verifies the
locations affected by those changes.

In particular, it verifies that:
* Every DebugLoc for an erased/changing instruction is still present on
  at least one new/changed instruction
* Failing that, that there is a line-0 location in the new/changed
  instructions. It's not possible to confirm which locations were merged so
  it conservatively assumes all unaccounted for locations are accounted
  for by any line-0 location to avoid false positives.
If that fails, it prints the lost locations in the debug output along with
the instructions that should have accounted for them.

In theory, this is usable by the legalizer, combiner, selector and any other
pass that performs incremental changes to the MIR. However, it has so far
only really been tested on the legalizer (not including the artifact
combiner) where it has caught lots of lost locations, particularly in Custom
legalizations. There's only one example here as my initial testing was on an
out-of-tree target and I haven't done a pass over the in-tree targets yet.

Depends on D77575, D77446

Reviewers: bogner, aprantl, vsk

Subscribers: jvesely, nhaehnle, mgorny, rovka, hiraditya, volkan, kerbowa, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D77576

Added: 
    llvm/include/llvm/CodeGen/GlobalISel/LostDebugLocObserver.h
    llvm/lib/CodeGen/GlobalISel/LostDebugLocObserver.cpp

Modified: 
    llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h
    llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
    llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
    llvm/unittests/CodeGen/GlobalISel/LegalizerTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h b/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h
index 07173b9719bd..e59bf1b91262 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h
@@ -26,6 +26,7 @@
 namespace llvm {
 
 class MachineRegisterInfo;
+class LostDebugLocObserver;
 
 class Legalizer : public MachineFunctionPass {
 public:
@@ -71,6 +72,7 @@ class Legalizer : public MachineFunctionPass {
   static MFResult
   legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
                           ArrayRef<GISelChangeObserver *> AuxObservers,
+                          LostDebugLocObserver &LocObserver,
                           MachineIRBuilder &MIRBuilder);
 };
 } // End namespace llvm.

diff  --git a/llvm/include/llvm/CodeGen/GlobalISel/LostDebugLocObserver.h b/llvm/include/llvm/CodeGen/GlobalISel/LostDebugLocObserver.h
new file mode 100644
index 000000000000..cd2a871e9579
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LostDebugLocObserver.h
@@ -0,0 +1,50 @@
+//===----- llvm/CodeGen/GlobalISel/LostDebugLocObserver.h -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// Tracks DebugLocs between checkpoints and verifies that they are transferred.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CODEGEN_GLOBALISEL_LOSTDEBUGLOCOBSERVER_H
+#define LLVM_CODEGEN_GLOBALISEL_LOSTDEBUGLOCOBSERVER_H
+
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+
+namespace llvm {
+class LostDebugLocObserver : public GISelChangeObserver {
+  StringRef DebugType;
+  SmallSet<DebugLoc, 4> LostDebugLocs;
+  SmallPtrSet<MachineInstr *, 4> PotentialMIsForDebugLocs;
+  unsigned NumLostDebugLocs = 0;
+
+public:
+  LostDebugLocObserver(StringRef DebugType) : DebugType(DebugType) {}
+
+  unsigned getNumLostDebugLocs() const { return NumLostDebugLocs; }
+
+  /// Call this to indicate that it's a good point to assess whether locations
+  /// have been lost. Typically this will be when a logical change has been
+  /// completed such as the caller has finished replacing some instructions with
+  /// alternatives. When CheckDebugLocs is true, the locations will be checked
+  /// to see if any have been lost since the last checkpoint. When
+  /// CheckDebugLocs is false, it will just reset ready for the next checkpoint
+  /// without checking anything. This can be helpful to limit the detection to
+  /// easy-to-fix portions of an algorithm before allowing more 
diff icult ones.
+  void checkpoint(bool CheckDebugLocs = true);
+
+  void createdInstr(MachineInstr &MI) override;
+  void erasingInstr(MachineInstr &MI) override;
+  void changingInstr(MachineInstr &MI) override;
+  void changedInstr(MachineInstr &MI) override;
+
+private:
+  void analyzeDebugLocations();
+};
+
+} // namespace llvm
+#endif // ifndef LLVM_CODEGEN_GLOBALISEL_LOSTDEBUGLOCOBSERVER_H

diff  --git a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
index 5774991a43b9..ed6a45afb7aa 100644
--- a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
+++ b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
@@ -16,6 +16,7 @@ add_llvm_component_library(LLVMGlobalISel
         LegalizerHelper.cpp
         LegalizerInfo.cpp
         Localizer.cpp
+        LostDebugLocObserver.cpp
         MachineIRBuilder.cpp
         RegBankSelect.cpp
         RegisterBank.cpp

diff  --git a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
index 0c0edc8a7b0c..4b6d24ba73e8 100644
--- a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -21,6 +21,7 @@
 #include "llvm/CodeGen/GlobalISel/GISelWorkList.h"
 #include "llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h"
 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h"
 #include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -42,6 +43,27 @@ static cl::opt<bool>
                          cl::desc("Should enable CSE in Legalizer"),
                          cl::Optional, cl::init(false));
 
+enum class DebugLocVerifyLevel {
+  None,
+  Legalizations,
+  LegalizationsAndArtifactCombiners,
+};
+#ifndef NDEBUG
+static cl::opt<DebugLocVerifyLevel> VerifyDebugLocs(
+    "verify-legalizer-debug-locs",
+    cl::desc("Verify that debug locations are handled"),
+    cl::values(clEnumVal(DebugLocVerifyLevel::None, "No verification"),
+               clEnumVal(DebugLocVerifyLevel::Legalizations,
+                         "Verify legalizations"),
+               clEnumVal(DebugLocVerifyLevel::LegalizationsAndArtifactCombiners,
+                         "Verify legalizations and artifact combines")),
+    cl::init(DebugLocVerifyLevel::Legalizations));
+#else
+// Always disable it for release builds by preventing the observer from being
+// installed.
+static const DebugLocVerifyLevel VerifyDebugLocs = DebugLocVerifyLevel::None;
+#endif
+
 char Legalizer::ID = 0;
 INITIALIZE_PASS_BEGIN(Legalizer, DEBUG_TYPE,
                       "Legalize the Machine IR a function's Machine IR", false,
@@ -144,6 +166,7 @@ class LegalizerWorkListManager : public GISelChangeObserver {
 Legalizer::MFResult
 Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
                                    ArrayRef<GISelChangeObserver *> AuxObservers,
+                                   LostDebugLocObserver &LocObserver,
                                    MachineIRBuilder &MIRBuilder) {
   MachineRegisterInfo &MRI = MF.getRegInfo();
 
@@ -200,6 +223,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
       if (isTriviallyDead(MI, MRI)) {
         LLVM_DEBUG(dbgs() << MI << "Is dead; erasing.\n");
         MI.eraseFromParentAndMarkDBGValuesForRemoval();
+        LocObserver.checkpoint();
         continue;
       }
 
@@ -225,6 +249,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
         return {Changed, &MI};
       }
       WorkListObserver.printNewInstrs();
+      LocObserver.checkpoint();
       Changed |= Res == LegalizerHelper::Legalized;
     }
     // Try to combine the instructions in RetryList again if there
@@ -239,6 +264,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
         return {Changed, RetryList.front()};
       }
     }
+    LocObserver.checkpoint();
     while (!ArtifactList.empty()) {
       MachineInstr &MI = *ArtifactList.pop_back_val();
       assert(isPreISelGenericOpcode(MI.getOpcode()) &&
@@ -247,6 +273,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
         LLVM_DEBUG(dbgs() << MI << "Is dead\n");
         RemoveDeadInstFromLists(&MI);
         MI.eraseFromParentAndMarkDBGValuesForRemoval();
+        LocObserver.checkpoint();
         continue;
       }
       SmallVector<MachineInstr *, 4> DeadInstructions;
@@ -254,11 +281,15 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
       if (ArtCombiner.tryCombineInstruction(MI, DeadInstructions,
                                             WrapperObserver)) {
         WorkListObserver.printNewInstrs();
+        LocObserver.checkpoint(
+            VerifyDebugLocs ==
+            DebugLocVerifyLevel::LegalizationsAndArtifactCombiners);
         for (auto *DeadMI : DeadInstructions) {
           LLVM_DEBUG(dbgs() << *DeadMI << "Is dead\n");
           RemoveDeadInstFromLists(DeadMI);
           DeadMI->eraseFromParentAndMarkDBGValuesForRemoval();
         }
+        LocObserver.checkpoint();
         Changed = true;
         continue;
       }
@@ -307,9 +338,13 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
     AuxObservers.push_back(CSEInfo);
   }
   assert(!CSEInfo || !errorToBool(CSEInfo->verify()));
+  LostDebugLocObserver LocObserver(DEBUG_TYPE);
+  if (VerifyDebugLocs > DebugLocVerifyLevel::None)
+    AuxObservers.push_back(&LocObserver);
 
   const LegalizerInfo &LI = *MF.getSubtarget().getLegalizerInfo();
-  MFResult Result = legalizeMachineFunction(MF, LI, AuxObservers, *MIRBuilder);
+  MFResult Result =
+      legalizeMachineFunction(MF, LI, AuxObservers, LocObserver, *MIRBuilder);
 
   if (Result.FailedOn) {
     reportGISelFailure(MF, TPC, MORE, "gisel-legalize",
@@ -326,6 +361,28 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
     reportGISelFailure(MF, TPC, MORE, R);
     return false;
   }
+
+  if (LocObserver.getNumLostDebugLocs()) {
+    MachineOptimizationRemarkMissed R("gisel-legalize", "LostDebugLoc",
+                                      MF.getFunction().getSubprogram(),
+                                      /*MBB=*/&*MF.begin());
+    R << "lost "
+      << ore::NV("NumLostDebugLocs", LocObserver.getNumLostDebugLocs())
+      << " debug locations during pass";
+    reportGISelWarning(MF, TPC, MORE, R);
+    // Example remark:
+    // --- !Missed
+    // Pass:            gisel-legalize
+    // Name:            GISelFailure
+    // DebugLoc:        { File: '.../legalize-urem.mir', Line: 1, Column: 0 }
+    // Function:        test_urem_s32
+    // Args:
+    //   - String:          'lost '
+    //   - NumLostDebugLocs: '1'
+    //   - String:          ' debug locations during pass'
+    // ...
+  }
+
   // If for some reason CSE was not enabled, make sure that we invalidate the
   // CSEInfo object (as we currently declare that the analysis is preserved).
   // The next time get on the wrapper is called, it will force it to recompute

diff  --git a/llvm/lib/CodeGen/GlobalISel/LostDebugLocObserver.cpp b/llvm/lib/CodeGen/GlobalISel/LostDebugLocObserver.cpp
new file mode 100644
index 000000000000..29f56b167c66
--- /dev/null
+++ b/llvm/lib/CodeGen/GlobalISel/LostDebugLocObserver.cpp
@@ -0,0 +1,113 @@
+//===----- llvm/CodeGen/GlobalISel/LostDebugLocObserver.cpp -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// Tracks DebugLocs between checkpoints and verifies that they are transferred.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h"
+
+using namespace llvm;
+
+#define LOC_DEBUG(X) DEBUG_WITH_TYPE(DebugType.str().c_str(), X)
+
+void LostDebugLocObserver::analyzeDebugLocations() {
+  if (LostDebugLocs.empty()) {
+    LOC_DEBUG(dbgs() << ".. No debug info was present\n");
+    return;
+  }
+  if (PotentialMIsForDebugLocs.empty()) {
+    LOC_DEBUG(
+        dbgs() << ".. No instructions to carry debug info (dead code?)\n");
+    return;
+  }
+
+  LOC_DEBUG(dbgs() << ".. Searching " << PotentialMIsForDebugLocs.size()
+                   << " instrs for " << LostDebugLocs.size() << " locations\n");
+  SmallPtrSet<MachineInstr *, 4> FoundIn;
+  for (MachineInstr *MI : PotentialMIsForDebugLocs) {
+    if (!MI->getDebugLoc())
+      continue;
+    // Check this first in case there's a matching line-0 location on both input
+    // and output.
+    if (MI->getDebugLoc().getLine() == 0) {
+      LOC_DEBUG(
+          dbgs() << ".. Assuming line-0 location covers remainder (if any)\n");
+      return;
+    }
+    if (LostDebugLocs.erase(MI->getDebugLoc())) {
+      LOC_DEBUG(dbgs() << ".. .. found " << MI->getDebugLoc() << " in " << *MI);
+      FoundIn.insert(MI);
+      continue;
+    }
+  }
+  if (LostDebugLocs.empty())
+    return;
+
+  NumLostDebugLocs += LostDebugLocs.size();
+  LOC_DEBUG({
+    dbgs() << ".. Lost locations:\n";
+    for (const DebugLoc &Loc : LostDebugLocs) {
+      dbgs() << ".. .. ";
+      Loc.print(dbgs());
+      dbgs() << "\n";
+    }
+    dbgs() << ".. MIs with matched locations:\n";
+    for (MachineInstr *MI : FoundIn)
+      if (PotentialMIsForDebugLocs.erase(MI))
+        dbgs() << ".. .. " << *MI;
+    dbgs() << ".. Remaining MIs with unmatched/no locations:\n";
+    for (const MachineInstr *MI : PotentialMIsForDebugLocs)
+      dbgs() << ".. .. " << *MI;
+  });
+}
+
+void LostDebugLocObserver::checkpoint(bool CheckDebugLocs) {
+  if (CheckDebugLocs)
+    analyzeDebugLocations();
+  PotentialMIsForDebugLocs.clear();
+  LostDebugLocs.clear();
+}
+
+void LostDebugLocObserver::createdInstr(MachineInstr &MI) {
+  PotentialMIsForDebugLocs.insert(&MI);
+}
+
+bool irTranslatorNeverAddsLocations(unsigned Opcode) {
+  switch (Opcode) {
+  default:
+    return false;
+  case TargetOpcode::G_CONSTANT:
+  case TargetOpcode::G_FCONSTANT:
+  case TargetOpcode::G_IMPLICIT_DEF:
+  case TargetOpcode::G_GLOBAL_VALUE:
+    return true;
+  }
+}
+
+void LostDebugLocObserver::erasingInstr(MachineInstr &MI) {
+  if (irTranslatorNeverAddsLocations(MI.getOpcode()))
+    return;
+
+  PotentialMIsForDebugLocs.erase(&MI);
+  if (MI.getDebugLoc())
+    LostDebugLocs.insert(MI.getDebugLoc());
+}
+
+void LostDebugLocObserver::changingInstr(MachineInstr &MI) {
+  if (irTranslatorNeverAddsLocations(MI.getOpcode()))
+    return;
+
+  PotentialMIsForDebugLocs.erase(&MI);
+  if (MI.getDebugLoc())
+    LostDebugLocs.insert(MI.getDebugLoc());
+}
+
+void LostDebugLocObserver::changedInstr(MachineInstr &MI) {
+  PotentialMIsForDebugLocs.insert(&MI);
+}

diff  --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerTest.cpp
index a4fb4443724a..ce6f386a544d 100644
--- a/llvm/unittests/CodeGen/GlobalISel/LegalizerTest.cpp
+++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerTest.cpp
@@ -6,8 +6,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "GISelMITest.h"
 #include "llvm/CodeGen/GlobalISel/Legalizer.h"
+#include "GISelMITest.h"
+#include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h"
+
+#define DEBUG_TYPE "legalizer-test"
 
 using namespace LegalizeActions;
 using namespace LegalizeMutations;
@@ -60,9 +63,10 @@ TEST_F(AArch64GISelMITest, BasicLegalizerTest) {
     return;
 
   ALegalizerInfo LI(MF->getSubtarget());
+  LostDebugLocObserver LocObserver(DEBUG_TYPE);
 
-  Legalizer::MFResult Result =
-      Legalizer::legalizeMachineFunction(*MF, LI, {}, B);
+  Legalizer::MFResult Result = Legalizer::legalizeMachineFunction(
+      *MF, LI, {&LocObserver}, LocObserver, B);
 
   EXPECT_TRUE(isNullMIPtr(Result.FailedOn));
   EXPECT_TRUE(Result.Changed);
@@ -98,6 +102,7 @@ TEST_F(AArch64GISelMITest, UnorderedArtifactCombiningTest) {
     return;
 
   ALegalizerInfo LI(MF->getSubtarget());
+  LostDebugLocObserver LocObserver(DEBUG_TYPE);
 
   // The events here unfold as follows:
   // 1. First, the function is scanned pre-forming the worklist of artifacts:
@@ -153,8 +158,8 @@ TEST_F(AArch64GISelMITest, UnorderedArtifactCombiningTest) {
   //  pair(s) of artifacts that could be immediately combined out. After that
   //  the process follows def-use chains, making them shorter at each step, thus
   //  combining everything that can be combined in O(n) time.
-  Legalizer::MFResult Result =
-      Legalizer::legalizeMachineFunction(*MF, LI, {}, B);
+  Legalizer::MFResult Result = Legalizer::legalizeMachineFunction(
+      *MF, LI, {&LocObserver}, LocObserver, B);
 
   EXPECT_TRUE(isNullMIPtr(Result.FailedOn));
   EXPECT_TRUE(Result.Changed);
@@ -191,9 +196,10 @@ TEST_F(AArch64GISelMITest, UnorderedArtifactCombiningManyCopiesTest) {
     return;
 
   ALegalizerInfo LI(MF->getSubtarget());
+  LostDebugLocObserver LocObserver(DEBUG_TYPE);
 
-  Legalizer::MFResult Result =
-      Legalizer::legalizeMachineFunction(*MF, LI, {}, B);
+  Legalizer::MFResult Result = Legalizer::legalizeMachineFunction(
+      *MF, LI, {&LocObserver}, LocObserver, B);
 
   EXPECT_TRUE(isNullMIPtr(Result.FailedOn));
   EXPECT_TRUE(Result.Changed);


        


More information about the llvm-commits mailing list