[llvm] 969e7ed - [SourceMgr/MLIR diagnostics] Introduce a new method to speed things up

Chris Lattner via llvm-commits llvm-commits at lists.llvm.org
Sat Apr 25 14:08:27 PDT 2020


Author: Chris Lattner
Date: 2020-04-25T14:06:44-07:00
New Revision: 969e7edd88ceb4791eb1cac22828290f0ae30b3d

URL: https://github.com/llvm/llvm-project/commit/969e7edd88ceb4791eb1cac22828290f0ae30b3d
DIFF: https://github.com/llvm/llvm-project/commit/969e7edd88ceb4791eb1cac22828290f0ae30b3d.diff

LOG: [SourceMgr/MLIR diagnostics] Introduce a new method to speed things up

Summary:
This introduces a new SourceMgr::FindLocForLineAndColumn method that
uses the OffsetCache in SourceMgr::SrcBuffer to do do a constant time
lookup for the line number (once the cache is populated).

Use this method in MLIR's SourceMgrDiagnosticHandler::convertLocToSMLoc,
replacing the O(n) scanning logic.  This resolves a long standing TODO
in MLIR, and makes one of my usecases go dramatically faster (which is
currently producing many diagnostics in a 40MB SourceBuffer).

NFC, this is just a performance speedup and cleanup.

Reviewers: rriddle!, ftynse!

Subscribers: hiraditya, mehdi_amini, rriddle, jpienaar, shauheen, antiagainst, nicolasvasilache, arpith-jacob, mgester, lucyrfox, liufengdb, Joonsoo, grosul1, frgossen, Kayjukh, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D78868

Added: 
    

Modified: 
    llvm/include/llvm/Support/SourceMgr.h
    llvm/lib/Support/SourceMgr.cpp
    mlir/lib/IR/Diagnostics.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Support/SourceMgr.h b/llvm/include/llvm/Support/SourceMgr.h
index 1b005519e5d4..3255e143d56b 100644
--- a/llvm/include/llvm/Support/SourceMgr.h
+++ b/llvm/include/llvm/Support/SourceMgr.h
@@ -61,10 +61,9 @@ class SourceMgr {
     /// into relatively small files (often smaller than 2^8 or 2^16 bytes),
     /// we select the offset vector element type dynamically based on the
     /// size of Buffer.
-    using VariableSizeOffsets = PointerUnion<std::vector<uint8_t> *,
-                                             std::vector<uint16_t> *,
-                                             std::vector<uint32_t> *,
-                                             std::vector<uint64_t> *>;
+    using VariableSizeOffsets =
+        PointerUnion<std::vector<uint8_t> *, std::vector<uint16_t> *,
+                     std::vector<uint32_t> *, std::vector<uint64_t> *>;
 
     /// Vector of offsets into Buffer at which there are line-endings
     /// (lazily populated). Once populated, the '\n' that marks the end of
@@ -74,12 +73,17 @@ class SourceMgr {
     /// offset corresponding to a particular SMLoc).
     mutable VariableSizeOffsets OffsetCache;
 
-    /// Populate \c OffsetCache and look up a given \p Ptr in it, assuming
-    /// it points somewhere into \c Buffer. The static type parameter \p T
-    /// must be an unsigned integer type from uint{8,16,32,64}_t large
-    /// enough to store offsets inside \c Buffer.
-    template<typename T>
+    /// Look up a given \p Ptr in in the buffer, determining which line it came
+    /// from.
     unsigned getLineNumber(const char *Ptr) const;
+    template <typename T>
+    unsigned getLineNumberSpecialized(const char *Ptr) const;
+
+    /// Return a pointer to the first character of the specified line number or
+    /// null if the line number is invalid.
+    const char *getPointerForLineNumber(unsigned LineNo) const;
+    template <typename T>
+    const char *getPointerForLineNumberSpecialized(unsigned LineNo) const;
 
     /// This is the location of the parent include, or null if at the top level.
     SMLoc IncludeLoc;
@@ -134,9 +138,7 @@ class SourceMgr {
     return Buffers[i - 1].Buffer.get();
   }
 
-  unsigned getNumBuffers() const {
-    return Buffers.size();
-  }
+  unsigned getNumBuffers() const { return Buffers.size(); }
 
   unsigned getMainFileID() const {
     assert(getNumBuffers());
@@ -184,12 +186,16 @@ class SourceMgr {
   std::pair<unsigned, unsigned> getLineAndColumn(SMLoc Loc,
                                                  unsigned BufferID = 0) const;
 
+  /// Given a line and column number in a mapped buffer, turn it into an SMLoc.
+  /// This will return a null SMLoc if the line/column location is invalid.
+  SMLoc FindLocForLineAndColumn(unsigned BufferID, unsigned LineNo,
+                                unsigned ColNo);
+
   /// Emit a message about the specified location with the specified string.
   ///
   /// \param ShowColors Display colored messages if output is a terminal and
   /// the default error handler is used.
-  void PrintMessage(raw_ostream &OS, SMLoc Loc, DiagKind Kind,
-                    const Twine &Msg,
+  void PrintMessage(raw_ostream &OS, SMLoc Loc, DiagKind Kind, const Twine &Msg,
                     ArrayRef<SMRange> Ranges = None,
                     ArrayRef<SMFixIt> FixIts = None,
                     bool ShowColors = true) const;
@@ -234,13 +240,13 @@ class SMFixIt {
 public:
   // FIXME: Twine.str() is not very efficient.
   SMFixIt(SMLoc Loc, const Twine &Insertion)
-    : Range(Loc, Loc), Text(Insertion.str()) {
+      : Range(Loc, Loc), Text(Insertion.str()) {
     assert(Loc.isValid());
   }
 
   // FIXME: Twine.str() is not very efficient.
   SMFixIt(SMRange R, const Twine &Replacement)
-    : Range(R), Text(Replacement.str()) {
+      : Range(R), Text(Replacement.str()) {
     assert(R.isValid());
   }
 
@@ -274,13 +280,12 @@ class SMDiagnostic {
   SMDiagnostic() = default;
   // Diagnostic with no location (e.g. file not found, command line arg error).
   SMDiagnostic(StringRef filename, SourceMgr::DiagKind Knd, StringRef Msg)
-    : Filename(filename), LineNo(-1), ColumnNo(-1), Kind(Knd), Message(Msg) {}
+      : Filename(filename), LineNo(-1), ColumnNo(-1), Kind(Knd), Message(Msg) {}
 
   // Diagnostic with a location.
-  SMDiagnostic(const SourceMgr &sm, SMLoc L, StringRef FN,
-               int Line, int Col, SourceMgr::DiagKind Kind,
-               StringRef Msg, StringRef LineStr,
-               ArrayRef<std::pair<unsigned,unsigned>> Ranges,
+  SMDiagnostic(const SourceMgr &sm, SMLoc L, StringRef FN, int Line, int Col,
+               SourceMgr::DiagKind Kind, StringRef Msg, StringRef LineStr,
+               ArrayRef<std::pair<unsigned, unsigned>> Ranges,
                ArrayRef<SMFixIt> FixIts = None);
 
   const SourceMgr *getSourceMgr() const { return SM; }
@@ -293,13 +298,9 @@ class SMDiagnostic {
   StringRef getLineContents() const { return LineContents; }
   ArrayRef<std::pair<unsigned, unsigned>> getRanges() const { return Ranges; }
 
-  void addFixIt(const SMFixIt &Hint) {
-    FixIts.push_back(Hint);
-  }
+  void addFixIt(const SMFixIt &Hint) { FixIts.push_back(Hint); }
 
-  ArrayRef<SMFixIt> getFixIts() const {
-    return FixIts;
-  }
+  ArrayRef<SMFixIt> getFixIts() const { return FixIts; }
 
   void print(const char *ProgName, raw_ostream &S, bool ShowColors = true,
              bool ShowKindLabel = true) const;

diff  --git a/llvm/lib/Support/SourceMgr.cpp b/llvm/lib/Support/SourceMgr.cpp
index acb7101ba020..5fe95badd547 100644
--- a/llvm/lib/Support/SourceMgr.cpp
+++ b/llvm/lib/Support/SourceMgr.cpp
@@ -42,7 +42,7 @@ unsigned SourceMgr::AddIncludeFile(const std::string &Filename,
                                    std::string &IncludedFile) {
   IncludedFile = Filename;
   ErrorOr<std::unique_ptr<MemoryBuffer>> NewBufOrErr =
-    MemoryBuffer::getFile(IncludedFile);
+      MemoryBuffer::getFile(IncludedFile);
 
   // If the file didn't exist directly, see if it's in an include path.
   for (unsigned i = 0, e = IncludeDirectories.size(); i != e && !NewBufOrErr;
@@ -69,54 +69,110 @@ unsigned SourceMgr::FindBufferContainingLoc(SMLoc Loc) const {
 }
 
 template <typename T>
-unsigned SourceMgr::SrcBuffer::getLineNumber(const char *Ptr) const {
-
-  // Ensure OffsetCache is allocated and populated with offsets of all the
-  // '\n' bytes.
-  std::vector<T> *Offsets = nullptr;
-  if (OffsetCache.isNull()) {
-    Offsets = new std::vector<T>();
-    OffsetCache = Offsets;
-    size_t Sz = Buffer->getBufferSize();
-    assert(Sz <= std::numeric_limits<T>::max());
-    StringRef S = Buffer->getBuffer();
-    for (size_t N = 0; N < Sz; ++N) {
-      if (S[N] == '\n') {
-        Offsets->push_back(static_cast<T>(N));
-      }
-    }
-  } else {
-    Offsets = OffsetCache.get<std::vector<T> *>();
+static std::vector<T> &GetOrCreateOffsetCache(
+    PointerUnion<std::vector<uint8_t> *, std::vector<uint16_t> *,
+                 std::vector<uint32_t> *, std::vector<uint64_t> *> &OffsetCache,
+    MemoryBuffer *Buffer) {
+  if (!OffsetCache.isNull())
+    return *OffsetCache.get<std::vector<T> *>();
+
+  // Lazily fill in the offset cache.
+  auto *Offsets = new std::vector<T>();
+  OffsetCache = Offsets;
+  size_t Sz = Buffer->getBufferSize();
+  assert(Sz <= std::numeric_limits<T>::max());
+  StringRef S = Buffer->getBuffer();
+  for (size_t N = 0; N < Sz; ++N) {
+    if (S[N] == '\n')
+      Offsets->push_back(static_cast<T>(N));
   }
 
+  return *Offsets;
+}
+
+template <typename T>
+unsigned SourceMgr::SrcBuffer::getLineNumberSpecialized(const char *Ptr) const {
+  std::vector<T> &Offsets =
+      GetOrCreateOffsetCache<T>(OffsetCache, Buffer.get());
+
   const char *BufStart = Buffer->getBufferStart();
   assert(Ptr >= BufStart && Ptr <= Buffer->getBufferEnd());
   ptr
diff _t PtrDiff = Ptr - BufStart;
-  assert(PtrDiff >= 0 && static_cast<size_t>(PtrDiff) <= std::numeric_limits<T>::max());
+  assert(PtrDiff >= 0 &&
+         static_cast<size_t>(PtrDiff) <= std::numeric_limits<T>::max());
   T PtrOffset = static_cast<T>(PtrDiff);
 
   // llvm::lower_bound gives the number of EOL before PtrOffset. Add 1 to get
   // the line number.
-  return llvm::lower_bound(*Offsets, PtrOffset) - Offsets->begin() + 1;
+  return llvm::lower_bound(Offsets, PtrOffset) - Offsets.begin() + 1;
+}
+
+/// Look up a given \p Ptr in in the buffer, determining which line it came
+/// from.
+unsigned SourceMgr::SrcBuffer::getLineNumber(const char *Ptr) const {
+  size_t Sz = Buffer->getBufferSize();
+  if (Sz <= std::numeric_limits<uint8_t>::max())
+    return getLineNumberSpecialized<uint8_t>(Ptr);
+  else if (Sz <= std::numeric_limits<uint16_t>::max())
+    return getLineNumberSpecialized<uint16_t>(Ptr);
+  else if (Sz <= std::numeric_limits<uint32_t>::max())
+    return getLineNumberSpecialized<uint32_t>(Ptr);
+  else
+    return getLineNumberSpecialized<uint64_t>(Ptr);
+}
+
+template <typename T>
+const char *SourceMgr::SrcBuffer::getPointerForLineNumberSpecialized(
+    unsigned LineNo) const {
+  std::vector<T> &Offsets =
+      GetOrCreateOffsetCache<T>(OffsetCache, Buffer.get());
+
+  // We start counting line and column numbers from 1.
+  if (LineNo != 0)
+    --LineNo;
+
+  const char *BufStart = Buffer->getBufferStart();
+
+  // The offset cache contains the location of the \n for the specified line,
+  // we want the start of the line.  As such, we look for the previous entry.
+  if (LineNo == 0)
+    return BufStart;
+  if (LineNo > Offsets.size())
+    return nullptr;
+  return BufStart + Offsets[LineNo - 1] + 1;
+}
+
+/// Return a pointer to the first character of the specified line number or
+/// null if the line number is invalid.
+const char *
+SourceMgr::SrcBuffer::getPointerForLineNumber(unsigned LineNo) const {
+  size_t Sz = Buffer->getBufferSize();
+  if (Sz <= std::numeric_limits<uint8_t>::max())
+    return getPointerForLineNumberSpecialized<uint8_t>(LineNo);
+  else if (Sz <= std::numeric_limits<uint16_t>::max())
+    return getPointerForLineNumberSpecialized<uint16_t>(LineNo);
+  else if (Sz <= std::numeric_limits<uint32_t>::max())
+    return getPointerForLineNumberSpecialized<uint32_t>(LineNo);
+  else
+    return getPointerForLineNumberSpecialized<uint64_t>(LineNo);
 }
 
 SourceMgr::SrcBuffer::SrcBuffer(SourceMgr::SrcBuffer &&Other)
-  : Buffer(std::move(Other.Buffer)),
-    OffsetCache(Other.OffsetCache),
-    IncludeLoc(Other.IncludeLoc) {
+    : Buffer(std::move(Other.Buffer)), OffsetCache(Other.OffsetCache),
+      IncludeLoc(Other.IncludeLoc) {
   Other.OffsetCache = nullptr;
 }
 
 SourceMgr::SrcBuffer::~SrcBuffer() {
   if (!OffsetCache.isNull()) {
-    if (OffsetCache.is<std::vector<uint8_t>*>())
-      delete OffsetCache.get<std::vector<uint8_t>*>();
-    else if (OffsetCache.is<std::vector<uint16_t>*>())
-      delete OffsetCache.get<std::vector<uint16_t>*>();
-    else if (OffsetCache.is<std::vector<uint32_t>*>())
-      delete OffsetCache.get<std::vector<uint32_t>*>();
+    if (OffsetCache.is<std::vector<uint8_t> *>())
+      delete OffsetCache.get<std::vector<uint8_t> *>();
+    else if (OffsetCache.is<std::vector<uint16_t> *>())
+      delete OffsetCache.get<std::vector<uint16_t> *>();
+    else if (OffsetCache.is<std::vector<uint32_t> *>())
+      delete OffsetCache.get<std::vector<uint32_t> *>();
     else
-      delete OffsetCache.get<std::vector<uint64_t>*>();
+      delete OffsetCache.get<std::vector<uint64_t> *>();
     OffsetCache = nullptr;
   }
 }
@@ -130,39 +186,58 @@ SourceMgr::getLineAndColumn(SMLoc Loc, unsigned BufferID) const {
   auto &SB = getBufferInfo(BufferID);
   const char *Ptr = Loc.getPointer();
 
-  size_t Sz = SB.Buffer->getBufferSize();
-  unsigned LineNo;
-  if (Sz <= std::numeric_limits<uint8_t>::max())
-    LineNo = SB.getLineNumber<uint8_t>(Ptr);
-  else if (Sz <= std::numeric_limits<uint16_t>::max())
-    LineNo = SB.getLineNumber<uint16_t>(Ptr);
-  else if (Sz <= std::numeric_limits<uint32_t>::max())
-    LineNo = SB.getLineNumber<uint32_t>(Ptr);
-  else
-    LineNo = SB.getLineNumber<uint64_t>(Ptr);
-
+  unsigned LineNo = SB.getLineNumber(Ptr);
   const char *BufStart = SB.Buffer->getBufferStart();
-  size_t NewlineOffs = StringRef(BufStart, Ptr-BufStart).find_last_of("\n\r");
-  if (NewlineOffs == StringRef::npos) NewlineOffs = ~(size_t)0;
-  return std::make_pair(LineNo, Ptr-BufStart-NewlineOffs);
+  size_t NewlineOffs = StringRef(BufStart, Ptr - BufStart).find_last_of("\n\r");
+  if (NewlineOffs == StringRef::npos)
+    NewlineOffs = ~(size_t)0;
+  return std::make_pair(LineNo, Ptr - BufStart - NewlineOffs);
+}
+
+/// Given a line and column number in a mapped buffer, turn it into an SMLoc.
+/// This will return a null SMLoc if the line/column location is invalid.
+SMLoc SourceMgr::FindLocForLineAndColumn(unsigned BufferID, unsigned LineNo,
+                                         unsigned ColNo) {
+  auto &SB = getBufferInfo(BufferID);
+  const char *Ptr = SB.getPointerForLineNumber(LineNo);
+  if (!Ptr)
+    return SMLoc();
+
+  // We start counting line and column numbers from 1.
+  if (ColNo != 0)
+    --ColNo;
+
+  // If we have a column number, validate it.
+  if (ColNo) {
+    // Make sure the location is within the current line.
+    if (Ptr + ColNo > SB.Buffer->getBufferEnd())
+      return SMLoc();
+
+    // Make sure there is no newline in the way.
+    if (StringRef(Ptr, ColNo).find_first_of("\n\r") != StringRef::npos)
+      return SMLoc();
+
+    Ptr += ColNo;
+  }
+
+  return SMLoc::getFromPointer(Ptr);
 }
 
 void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc, raw_ostream &OS) const {
-  if (IncludeLoc == SMLoc()) return;  // Top of stack.
+  if (IncludeLoc == SMLoc())
+    return; // Top of stack.
 
   unsigned CurBuf = FindBufferContainingLoc(IncludeLoc);
   assert(CurBuf && "Invalid or unspecified location!");
 
   PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS);
 
-  OS << "Included from "
-     << getBufferInfo(CurBuf).Buffer->getBufferIdentifier()
+  OS << "Included from " << getBufferInfo(CurBuf).Buffer->getBufferIdentifier()
      << ":" << FindLineNumber(IncludeLoc, CurBuf) << ":\n";
 }
 
 SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
-                                   const Twine &Msg,
-                                   ArrayRef<SMRange> Ranges,
+                                   const Twine &Msg, ArrayRef<SMRange> Ranges,
                                    ArrayRef<SMFixIt> FixIts) const {
   // First thing to do: find the current buffer containing the specified
   // location to pull out the source line.
@@ -196,7 +271,8 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
     // location.
     for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
       SMRange R = Ranges[i];
-      if (!R.isValid()) continue;
+      if (!R.isValid())
+        continue;
 
       // If the line doesn't contain any part of the range, then ignore it.
       if (R.Start.getPointer() > LineEnd || R.End.getPointer() < LineStart)
@@ -210,16 +286,16 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
 
       // Translate from SMLoc ranges to column ranges.
       // FIXME: Handle multibyte characters.
-      ColRanges.push_back(std::make_pair(R.Start.getPointer()-LineStart,
-                                         R.End.getPointer()-LineStart));
+      ColRanges.push_back(std::make_pair(R.Start.getPointer() - LineStart,
+                                         R.End.getPointer() - LineStart));
     }
 
     LineAndCol = getLineAndColumn(Loc, CurBuf);
   }
 
   return SMDiagnostic(*this, Loc, BufferID, LineAndCol.first,
-                      LineAndCol.second-1, Kind, Msg.str(),
-                      LineStr, ColRanges, FixIts);
+                      LineAndCol.second - 1, Kind, Msg.str(), LineStr,
+                      ColRanges, FixIts);
 }
 
 void SourceMgr::PrintMessage(raw_ostream &OS, const SMDiagnostic &Diagnostic,
@@ -240,9 +316,9 @@ void SourceMgr::PrintMessage(raw_ostream &OS, const SMDiagnostic &Diagnostic,
 }
 
 void SourceMgr::PrintMessage(raw_ostream &OS, SMLoc Loc,
-                             SourceMgr::DiagKind Kind,
-                             const Twine &Msg, ArrayRef<SMRange> Ranges,
-                             ArrayRef<SMFixIt> FixIts, bool ShowColors) const {
+                             SourceMgr::DiagKind Kind, const Twine &Msg,
+                             ArrayRef<SMRange> Ranges, ArrayRef<SMFixIt> FixIts,
+                             bool ShowColors) const {
   PrintMessage(OS, GetMessage(Loc, Kind, Msg, Ranges, FixIts), ShowColors);
 }
 
@@ -268,7 +344,8 @@ SMDiagnostic::SMDiagnostic(const SourceMgr &sm, SMLoc L, StringRef FN, int Line,
 }
 
 static void buildFixItLine(std::string &CaretLine, std::string &FixItLine,
-                           ArrayRef<SMFixIt> FixIts, ArrayRef<char> SourceLine){
+                           ArrayRef<SMFixIt> FixIts,
+                           ArrayRef<char> SourceLine) {
   if (FixIts.empty())
     return;
 
@@ -277,8 +354,8 @@ static void buildFixItLine(std::string &CaretLine, std::string &FixItLine,
 
   size_t PrevHintEndCol = 0;
 
-  for (ArrayRef<SMFixIt>::iterator I = FixIts.begin(), E = FixIts.end();
-       I != E; ++I) {
+  for (ArrayRef<SMFixIt>::iterator I = FixIts.begin(), E = FixIts.end(); I != E;
+       ++I) {
     // If the fixit contains a newline or tab, ignore it.
     if (I->getText().find_first_of("\n\r\t") != StringRef::npos)
       continue;
@@ -361,12 +438,10 @@ static void printSourceLine(raw_ostream &S, StringRef LineContents) {
   S << '\n';
 }
 
-static bool isNonASCII(char c) {
-  return c & 0x80;
-}
+static bool isNonASCII(char c) { return c & 0x80; }
 
-void SMDiagnostic::print(const char *ProgName, raw_ostream &OS,
-                         bool ShowColors, bool ShowKindLabel) const {
+void SMDiagnostic::print(const char *ProgName, raw_ostream &OS, bool ShowColors,
+                         bool ShowKindLabel) const {
   {
     WithColor S(OS, raw_ostream::SAVEDCOLOR, true, false, !ShowColors);
 
@@ -423,22 +498,21 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &OS,
   size_t NumColumns = LineContents.size();
 
   // Build the line with the caret and ranges.
-  std::string CaretLine(NumColumns+1, ' ');
+  std::string CaretLine(NumColumns + 1, ' ');
 
   // Expand any ranges.
   for (unsigned r = 0, e = Ranges.size(); r != e; ++r) {
     std::pair<unsigned, unsigned> R = Ranges[r];
     std::fill(&CaretLine[R.first],
-              &CaretLine[std::min((size_t)R.second, CaretLine.size())],
-              '~');
+              &CaretLine[std::min((size_t)R.second, CaretLine.size())], '~');
   }
 
   // Add any fix-its.
   // FIXME: Find the beginning of the line properly for multibyte characters.
   std::string FixItInsertionLine;
-  buildFixItLine(CaretLine, FixItInsertionLine, FixIts,
-                 makeArrayRef(Loc.getPointer() - ColumnNo,
-                              LineContents.size()));
+  buildFixItLine(
+      CaretLine, FixItInsertionLine, FixIts,
+      makeArrayRef(Loc.getPointer() - ColumnNo, LineContents.size()));
 
   // Finally, plop on the caret.
   if (unsigned(ColumnNo) <= NumColumns)
@@ -449,7 +523,7 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &OS,
   // ... and remove trailing whitespace so the output doesn't wrap for it.  We
   // know that the line isn't completely empty because it has the caret in it at
   // least.
-  CaretLine.erase(CaretLine.find_last_not_of(' ')+1);
+  CaretLine.erase(CaretLine.find_last_not_of(' ') + 1);
 
   printSourceLine(OS, LineContents);
 

diff  --git a/mlir/lib/IR/Diagnostics.cpp b/mlir/lib/IR/Diagnostics.cpp
index d180b437eefe..ec2ca9363408 100644
--- a/mlir/lib/IR/Diagnostics.cpp
+++ b/mlir/lib/IR/Diagnostics.cpp
@@ -334,32 +334,32 @@ ScopedDiagnosticHandler::~ScopedDiagnosticHandler() {
 namespace mlir {
 namespace detail {
 struct SourceMgrDiagnosticHandlerImpl {
-  /// Get a memory buffer for the given file, or nullptr if one is not found.
-  const llvm::MemoryBuffer *getBufferForFile(llvm::SourceMgr &mgr,
-                                             StringRef filename) {
+  /// Return the SrcManager buffer id for the specified file, or zero if none
+  /// can be found.
+  unsigned getSourceMgrBufferIDForFile(llvm::SourceMgr &mgr,
+                                       StringRef filename) {
     // Check for an existing mapping to the buffer id for this file.
-    auto bufferIt = filenameToBuf.find(filename);
-    if (bufferIt != filenameToBuf.end())
+    auto bufferIt = filenameToBufId.find(filename);
+    if (bufferIt != filenameToBufId.end())
       return bufferIt->second;
 
     // Look for a buffer in the manager that has this filename.
     for (unsigned i = 1, e = mgr.getNumBuffers() + 1; i != e; ++i) {
       auto *buf = mgr.getMemoryBuffer(i);
       if (buf->getBufferIdentifier() == filename)
-        return filenameToBuf[filename] = buf;
+        return filenameToBufId[filename] = i;
     }
 
     // Otherwise, try to load the source file.
-    const llvm::MemoryBuffer *newBuf = nullptr;
     std::string ignored;
-    if (auto newBufID =
-            mgr.AddIncludeFile(std::string(filename), llvm::SMLoc(), ignored))
-      newBuf = mgr.getMemoryBuffer(newBufID);
-    return filenameToBuf[filename] = newBuf;
+    unsigned id =
+        mgr.AddIncludeFile(std::string(filename), llvm::SMLoc(), ignored);
+    filenameToBufId[filename] = id;
+    return id;
   }
 
-  /// Mapping between file name and buffer pointer.
-  llvm::StringMap<const llvm::MemoryBuffer *> filenameToBuf;
+  /// Mapping between file name and buffer ID's.
+  llvm::StringMap<unsigned> filenameToBufId;
 };
 } // end namespace detail
 } // end namespace mlir
@@ -501,68 +501,18 @@ void SourceMgrDiagnosticHandler::emitDiagnostic(Diagnostic &diag) {
 /// Get a memory buffer for the given file, or nullptr if one is not found.
 const llvm::MemoryBuffer *
 SourceMgrDiagnosticHandler::getBufferForFile(StringRef filename) {
-  return impl->getBufferForFile(mgr, filename);
+  if (unsigned id = impl->getSourceMgrBufferIDForFile(mgr, filename))
+    return mgr.getMemoryBuffer(id);
+  return nullptr;
 }
 
 /// Get a memory buffer for the given file, or the main file of the source
 /// manager if one doesn't exist. This always returns non-null.
 llvm::SMLoc SourceMgrDiagnosticHandler::convertLocToSMLoc(FileLineColLoc loc) {
-  // Get the buffer for this filename.
-  auto *membuf = getBufferForFile(loc.getFilename());
-  if (!membuf)
+  unsigned bufferId = impl->getSourceMgrBufferIDForFile(mgr, loc.getFilename());
+  if (!bufferId)
     return llvm::SMLoc();
-
-  // TODO: This should really be upstreamed to be a method on llvm::SourceMgr.
-  // Doing so would allow it to use the offset cache that is already maintained
-  // by SrcBuffer, making this more efficient.
-  unsigned lineNo = loc.getLine();
-  unsigned columnNo = loc.getColumn();
-
-  // Scan for the correct line number.
-  const char *position = membuf->getBufferStart();
-  const char *end = membuf->getBufferEnd();
-
-  // We start counting line and column numbers from 1.
-  if (lineNo != 0)
-    --lineNo;
-  if (columnNo != 0)
-    --columnNo;
-
-  while (position < end && lineNo) {
-    auto curChar = *position++;
-
-    // Scan for newlines.  If this isn't one, ignore it.
-    if (curChar != '\r' && curChar != '\n')
-      continue;
-
-    // We saw a line break, decrement our counter.
-    --lineNo;
-
-    // Check for \r\n and \n\r and treat it as a single escape.  We know that
-    // looking past one character is safe because MemoryBuffer's are always nul
-    // terminated.
-    if (*position != curChar && (*position == '\r' || *position == '\n'))
-      ++position;
-  }
-
-  // If the line/column counter was invalid, return a pointer to the start of
-  // the buffer.
-  if (lineNo || position + columnNo > end)
-    return llvm::SMLoc::getFromPointer(membuf->getBufferStart());
-
-  // If the column is zero, try to skip to the first non-whitespace character.
-  if (columnNo == 0) {
-    auto isNewline = [](char c) { return c == '\n' || c == '\r'; };
-    auto isWhitespace = [](char c) { return c == ' ' || c == '\t'; };
-
-    // Look for a valid non-whitespace character before the next line.
-    for (auto *newPos = position; newPos < end && !isNewline(*newPos); ++newPos)
-      if (!isWhitespace(*newPos))
-        return llvm::SMLoc::getFromPointer(newPos);
-  }
-
-  // Otherwise return the right pointer.
-  return llvm::SMLoc::getFromPointer(position + columnNo);
+  return mgr.FindLocForLineAndColumn(bufferId, loc.getLine(), loc.getColumn());
 }
 
 //===----------------------------------------------------------------------===//


        


More information about the llvm-commits mailing list