[Mlir-commits] [mlir] [mlir] Add FileRange location type. (PR #80213)

Jacques Pienaar llvmlistbot at llvm.org
Wed Jan 31 15:04:19 PST 2024


https://github.com/jpienaar created https://github.com/llvm/llvm-project/pull/80213

This location type represents a contiguous range inside a file. It is conceptually a pair of FileLineCol and size.

Considered:
- Actually nesting a FileLineCol - this has the advantage that everywhere a walk is done for FileLineCol, it still works; but this results in uniqueing additional locations which are unlikely to be used and also one would probably want to specifically handle it so that one gets the best effect.
- Abusing FusedLoc and using a convention but that would result in 3 uniqued locations per range.
- Extending FileLineColLoc to have an optional size. This one would make the naming inaccurate and every FileLineColLoc bigger. (but beyond that didn't feel too terrible).
- Different elements being store in attribute: in particular storing the end line and end column in addition or instead of size. But storing both results in encoding redundant info and wasn't sure the visual benefit in the non-sourgemanager case is worth it. And storing these instead would result in less efficient error emission.

This is a rather minimal change, it does not yet add bindings (C or Python), bytecode encodings, lowering to LLVM debug locations etc. Also not sure if I'm happy with the spelling here.

>From c65e61c18bdcc6e746e7d992d150453d8ff936a9 Mon Sep 17 00:00:00 2001
From: Jacques Pienaar <jpienaar at google.com>
Date: Sat, 6 Jan 2024 13:41:27 -0800
Subject: [PATCH] [mlir] Add FileRange location type.

This location type represents a contiguous range inside a file. It is
conceptually a pair of FileLineCol and size.

Considered:
- Actually nesting a FileLineCol - this has the advantage that everywhere a
  walk is done for FileLineCol, it still works; but this results in uniqueing
  additional locations which are unlikely to be used and also one would probably
  want to specifically handle it so that one gets the best effect.
- Abusing FusedLoc and using a convention but that would result in 3 uniqued
  locations per range.
- Extending FileLineColLoc to have an optional size. This one would make the
  naming inaccurate and every FileLineColLoc bigger. (was close).
- Different elements being store in attribute: in particular storing the end
  line and end column in addition or instead of size. But storing both results
  in encoding redundant info and wasn't sure the visual benefit in the
  non-sourgemanager case is worth it. And storing these instead would result in
  less efficient error emission.

This is a rather minimal change, it does not yet add bindings (C or Python),
bytecode encodings, lowering to LLVM debug locations etc.
---
 .../mlir/IR/BuiltinLocationAttributes.td      | 38 +++++++++++++
 mlir/include/mlir/IR/Diagnostics.h            |  3 +
 mlir/lib/AsmParser/LocationParser.cpp         | 57 +++++++++++++++++++
 mlir/lib/AsmParser/Parser.h                   |  3 +
 mlir/lib/IR/AsmPrinter.cpp                    |  9 +++
 mlir/lib/IR/Diagnostics.cpp                   | 52 ++++++++++++++---
 mlir/lib/IR/Location.cpp                      |  4 +-
 mlir/test/IR/locations.mlir                   |  2 +-
 mlir/unittests/IR/CMakeLists.txt              |  1 +
 mlir/unittests/IR/LocationTest.cpp            | 54 ++++++++++++++++++
 mlir/utils/gdb-scripts/prettyprinters.py      |  1 +
 11 files changed, 212 insertions(+), 12 deletions(-)
 create mode 100644 mlir/unittests/IR/LocationTest.cpp

diff --git a/mlir/include/mlir/IR/BuiltinLocationAttributes.td b/mlir/include/mlir/IR/BuiltinLocationAttributes.td
index dfcc180071f72..7fdce727b7e0a 100644
--- a/mlir/include/mlir/IR/BuiltinLocationAttributes.td
+++ b/mlir/include/mlir/IR/BuiltinLocationAttributes.td
@@ -102,6 +102,44 @@ def FileLineColLoc : Builtin_LocationAttr<"FileLineColLoc"> {
   let attrName = "builtin.file_line_loc";
 }
 
+//===----------------------------------------------------------------------===//
+// FileRange
+//===----------------------------------------------------------------------===//
+
+def FileRangeLoc : Builtin_LocationAttr<"FileRangeLoc"> {
+  let summary = "A file:line:column to line:column source location range";
+  let description = [{
+    Syntax:
+
+    ```
+    filelinecol-location ::= `range(` string-literal `:` integer-literal `:`
+                             integer-literal ` to ` integer-literal `)`
+    ```
+
+    An instance of this location represents a tuple of file, start line number,
+    start column number, end line, and end column number. This represents a range
+    inside a file.
+  }];
+  let parameters = (ins "StringAttr":$filename, "unsigned":$line,
+                        "unsigned":$column, "unsigned":$byte_size);
+  let builders = [
+    AttrBuilderWithInferredContext<(ins "StringAttr":$filename,
+                                        "unsigned":$line,
+                                        "unsigned":$column,
+                                        "unsigned":$byteSize), [{
+      return $_get(filename.getContext(), filename, line, column, byteSize);
+    }]>,
+    AttrBuilder<(ins "StringRef":$filename, "unsigned":$line,
+                     "unsigned":$column, "unsigned":$byteSize), [{
+      return $_get($_ctxt,
+                   StringAttr::get($_ctxt, filename.empty() ? "-" : filename),
+                   line, column, byteSize);
+    }]>
+  ];
+  let skipDefaultBuilders = 1;
+  let attrName = "builtin.file_range";
+}
+
 //===----------------------------------------------------------------------===//
 // FusedLoc
 //===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/IR/Diagnostics.h b/mlir/include/mlir/IR/Diagnostics.h
index 205c5d9274226..9ec6d98d53a57 100644
--- a/mlir/include/mlir/IR/Diagnostics.h
+++ b/mlir/include/mlir/IR/Diagnostics.h
@@ -594,6 +594,9 @@ class SourceMgrDiagnosticHandler : public ScopedDiagnosticHandler {
   /// Convert a location into the given memory buffer into an SMLoc.
   SMLoc convertLocToSMLoc(FileLineColLoc loc);
 
+  /// Convert a location into the given memory buffer into an SMRange.
+  SMRange convertLocToSMRange(FileRangeLoc loc);
+
   /// Given a location, returns the first nested location (including 'loc') that
   /// can be shown to the user.
   std::optional<Location> findLocToShow(Location loc);
diff --git a/mlir/lib/AsmParser/LocationParser.cpp b/mlir/lib/AsmParser/LocationParser.cpp
index fed65c070121d..8ab9a151a90c4 100644
--- a/mlir/lib/AsmParser/LocationParser.cpp
+++ b/mlir/lib/AsmParser/LocationParser.cpp
@@ -20,12 +20,16 @@ using namespace mlir::detail;
 /// Specific location instances.
 ///
 /// location-inst ::= filelinecol-location |
+///                   filerange-location |
 ///                   name-location |
 ///                   callsite-location |
 ///                   fused-location |
 ///                   unknown-location
 /// filelinecol-location ::= string-literal ':' integer-literal
 ///                                         ':' integer-literal
+/// filerange-location ::= 'range(' string-literal ':' integer-literal
+///                                                ':' integer-literal
+///                                                'to' integer-literal ')'
 /// name-location ::= string-literal
 /// callsite-location ::= 'callsite' '(' location-inst 'at' location-inst ')'
 /// fused-location ::= fused ('<' attribute-value '>')?
@@ -98,6 +102,55 @@ ParseResult Parser::parseFusedLocation(LocationAttr &loc) {
   return success();
 }
 
+ParseResult Parser::parseRangeLocation(LocationAttr &loc) {
+  consumeToken(Token::bare_identifier);
+  if (parseToken(Token::l_paren, "expected '(' in FileRangeLoc"))
+    return failure();
+
+  auto *ctx = getContext();
+  auto str = getToken().getStringValue();
+  consumeToken(Token::string);
+
+  if (parseToken(Token::colon, "expected ':' in FileRangeLoc"))
+    return failure();
+
+  // Parse the line number.
+  if (getToken().isNot(Token::integer))
+    return emitWrongTokenError("expected integer line number in FileRangeLoc");
+  auto line = getToken().getUnsignedIntegerValue();
+  if (!line)
+    return emitWrongTokenError("expected integer line number in FileRangeLoc");
+  consumeToken(Token::integer);
+
+  // Parse the ':'.
+  if (parseToken(Token::colon, "expected ':' in FileRangeLoc"))
+    return failure();
+
+  // Parse the column number.
+  if (getToken().isNot(Token::integer))
+    return emitWrongTokenError(
+        "expected integer column number in FileRangeLoc");
+  auto column = getToken().getUnsignedIntegerValue();
+  if (!column.has_value())
+    return emitError("expected integer column number in FileRangeLoc");
+  consumeToken(Token::integer);
+
+  if (parseToken(Token::kw_to, "expected 'to' in FileRangeLoc"))
+    return failure();
+
+  auto size = getToken().getUnsignedIntegerValue();
+  if (!size.has_value())
+    return emitError("expected integer size in FileRangeLoc");
+  consumeToken(Token::integer);
+
+  // Parse the closing ')'.
+  if (parseToken(Token::r_paren, "expected ')' after file range location"))
+    return failure();
+
+  loc = FileRangeLoc::get(ctx, str, *line, *column, *size);
+  return success();
+}
+
 ParseResult Parser::parseNameOrFileLineColLocation(LocationAttr &loc) {
   auto *ctx = getContext();
   auto str = getToken().getStringValue();
@@ -169,6 +222,10 @@ ParseResult Parser::parseLocationInstance(LocationAttr &loc) {
   if (getToken().is(Token::string))
     return parseNameOrFileLineColLocation(loc);
 
+  // Check for the 'range' signifying a range location.
+  if (getToken().getSpelling() == "range")
+    return parseRangeLocation(loc);
+
   // Bare tokens required for other cases.
   if (!getToken().is(Token::bare_identifier))
     return emitWrongTokenError("expected location instance");
diff --git a/mlir/lib/AsmParser/Parser.h b/mlir/lib/AsmParser/Parser.h
index b959e67b8e258..9ea4b3ea6a03a 100644
--- a/mlir/lib/AsmParser/Parser.h
+++ b/mlir/lib/AsmParser/Parser.h
@@ -300,6 +300,9 @@ class Parser {
   /// Parse a name or FileLineCol location instance.
   ParseResult parseNameOrFileLineColLocation(LocationAttr &loc);
 
+  /// Parse a FileRange location instance.
+  ParseResult parseRangeLocation(LocationAttr &loc);
+
   //===--------------------------------------------------------------------===//
   // Affine Parsing
   //===--------------------------------------------------------------------===//
diff --git a/mlir/lib/IR/AsmPrinter.cpp b/mlir/lib/IR/AsmPrinter.cpp
index 6b8b7473bf0f8..331f76d065976 100644
--- a/mlir/lib/IR/AsmPrinter.cpp
+++ b/mlir/lib/IR/AsmPrinter.cpp
@@ -1991,6 +1991,15 @@ void AsmPrinter::Impl::printLocationInternal(LocationAttr loc, bool pretty,
           printEscapedString(loc.getFilename());
         os << ':' << loc.getLine() << ':' << loc.getColumn();
       })
+      .Case<FileRangeLoc>([&](FileRangeLoc loc) {
+        os << "range(";
+        if (pretty)
+          os << loc.getFilename().getValue();
+        else
+          printEscapedString(loc.getFilename());
+        os << ':' << loc.getLine() << ':' << loc.getColumn() << " to "
+           << loc.getByteSize() << ')';
+      })
       .Case<NameLoc>([&](NameLoc loc) {
         printEscapedString(loc.getName());
 
diff --git a/mlir/lib/IR/Diagnostics.cpp b/mlir/lib/IR/Diagnostics.cpp
index 6b311a90e0de5..35418e2265a3e 100644
--- a/mlir/lib/IR/Diagnostics.cpp
+++ b/mlir/lib/IR/Diagnostics.cpp
@@ -442,10 +442,13 @@ void SourceMgrDiagnosticHandler::emitDiagnostic(Location loc, Twine message,
                                                 DiagnosticSeverity kind,
                                                 bool displaySourceLine) {
   // Extract a file location from this loc.
-  auto fileLoc = loc->findInstanceOf<FileLineColLoc>();
+  auto fileRange = loc->findInstanceOf<FileRangeLoc>();
+  FileLineColLoc fileLoc = nullptr;
+  if (!fileRange)
+    fileLoc = loc->findInstanceOf<FileLineColLoc>();
 
   // If one doesn't exist, then print the raw message without a source location.
-  if (!fileLoc) {
+  if (!fileRange && !fileLoc) {
     std::string str;
     llvm::raw_string_ostream strOS(str);
     if (!llvm::isa<UnknownLoc>(loc))
@@ -457,9 +460,16 @@ void SourceMgrDiagnosticHandler::emitDiagnostic(Location loc, Twine message,
   // Otherwise if we are displaying the source line, try to convert the file
   // location to an SMLoc.
   if (displaySourceLine) {
-    auto smloc = convertLocToSMLoc(fileLoc);
-    if (smloc.isValid())
-      return mgr.PrintMessage(os, smloc, getDiagKind(kind), message);
+    if (fileLoc) {
+      auto smloc = convertLocToSMLoc(fileLoc);
+      if (smloc.isValid())
+        return mgr.PrintMessage(os, smloc, getDiagKind(kind), message);
+    } else if (fileRange) {
+      auto smrange = convertLocToSMRange(fileRange);
+      if (smrange.isValid())
+        return mgr.PrintMessage(os, smrange.Start, getDiagKind(kind), message,
+                                smrange);
+    }
   }
 
   // If the conversion was unsuccessful, create a diagnostic with the file
@@ -467,8 +477,14 @@ void SourceMgrDiagnosticHandler::emitDiagnostic(Location loc, Twine message,
   // the constructor of SMDiagnostic that takes a location.
   std::string locStr;
   llvm::raw_string_ostream locOS(locStr);
-  locOS << fileLoc.getFilename().getValue() << ":" << fileLoc.getLine() << ":"
-        << fileLoc.getColumn();
+  if (fileLoc) {
+    locOS << fileLoc.getFilename().getValue() << ":" << fileLoc.getLine() << ":"
+          << fileLoc.getColumn();
+  } else {
+    locOS << "range(" << fileRange.getFilename().getValue() << ":"
+          << fileRange.getLine() << ":" << fileRange.getColumn() << " to "
+          << fileRange.getByteSize() << ")";
+  }
   llvm::SMDiagnostic diag(locOS.str(), getDiagKind(kind), message.str());
   diag.print(nullptr, os);
 }
@@ -542,6 +558,7 @@ SourceMgrDiagnosticHandler::findLocToShow(Location loc) {
         return findLocToShow(callLoc.getCallee());
       })
       .Case([&](FileLineColLoc) -> std::optional<Location> { return loc; })
+      .Case([&](FileRangeLoc) -> std::optional<Location> { return loc; })
       .Case([&](FusedLoc fusedLoc) -> std::optional<Location> {
         // Fused location is unique in that we try to find a sub-location to
         // show, rather than the top-level location itself.
@@ -563,8 +580,6 @@ SourceMgrDiagnosticHandler::findLocToShow(Location loc) {
       });
 }
 
-/// Get a memory buffer for the given file, or the main file of the source
-/// manager if one doesn't exist. This always returns non-null.
 SMLoc SourceMgrDiagnosticHandler::convertLocToSMLoc(FileLineColLoc loc) {
   // The column and line may be zero to represent unknown column and/or unknown
   /// line/column information.
@@ -577,6 +592,23 @@ SMLoc SourceMgrDiagnosticHandler::convertLocToSMLoc(FileLineColLoc loc) {
   return mgr.FindLocForLineAndColumn(bufferId, loc.getLine(), loc.getColumn());
 }
 
+SMRange SourceMgrDiagnosticHandler::convertLocToSMRange(FileRangeLoc loc) {
+  // The column and line may be zero to represent unknown column and/or unknown
+  /// line/column information.
+  if (loc.getLine() == 0 || loc.getColumn() == 0)
+    return SMRange();
+
+  unsigned bufferId = impl->getSourceMgrBufferIDForFile(mgr, loc.getFilename());
+  if (!bufferId)
+    return SMRange();
+  SMLoc start =
+      mgr.FindLocForLineAndColumn(bufferId, loc.getLine(), loc.getColumn());
+  SMLoc end;
+  if (start.isValid())
+    end = SMLoc::getFromPointer(start.getPointer() + loc.getByteSize());
+  return {start, end};
+}
+
 //===----------------------------------------------------------------------===//
 // SourceMgrDiagnosticVerifierHandler
 //===----------------------------------------------------------------------===//
@@ -846,6 +878,8 @@ void SourceMgrDiagnosticVerifierHandler::process(Diagnostic &diag) {
   if (auto fileLoc = diag.getLocation()->findInstanceOf<FileLineColLoc>())
     return process(fileLoc, diag.str(), kind);
 
+  // TODO: consider handling ranges here too.
+
   emitDiagnostic(diag.getLocation(),
                  "unexpected " + getDiagKindStr(kind) + ": " + diag.str(),
                  DiagnosticSeverity::Error);
diff --git a/mlir/lib/IR/Location.cpp b/mlir/lib/IR/Location.cpp
index c548bbe4b6c86..a4e7b5abc92ca 100644
--- a/mlir/lib/IR/Location.cpp
+++ b/mlir/lib/IR/Location.cpp
@@ -64,8 +64,8 @@ WalkResult LocationAttr::walk(function_ref<WalkResult(Location)> walkFn) {
 
 /// Methods for support type inquiry through isa, cast, and dyn_cast.
 bool LocationAttr::classof(Attribute attr) {
-  return llvm::isa<CallSiteLoc, FileLineColLoc, FusedLoc, NameLoc, OpaqueLoc,
-                   UnknownLoc>(attr);
+  return llvm::isa<CallSiteLoc, FileLineColLoc, FileRangeLoc, FusedLoc, NameLoc,
+                   OpaqueLoc, UnknownLoc>(attr);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/test/IR/locations.mlir b/mlir/test/IR/locations.mlir
index 8d7c7e4f13ed4..54f5d60ca7dc6 100644
--- a/mlir/test/IR/locations.mlir
+++ b/mlir/test/IR/locations.mlir
@@ -16,7 +16,7 @@ func.func @inline_notation() -> i32 {
   // CHECK: affine.for %arg0 loc("IVlocation") = 0 to 8 {
   // CHECK: } loc(fused["foo", "mysource.cc":10:8])
   affine.for %i0 loc("IVlocation") = 0 to 8 {
-  } loc(fused["foo", "mysource.cc":10:8])
+  } loc(fused["foo", range("mysource.cc":10:8 to 5)])
 
   // CHECK: } loc(fused<"myPass">["foo", "foo2"])
   affine.if #set0(%2) {
diff --git a/mlir/unittests/IR/CMakeLists.txt b/mlir/unittests/IR/CMakeLists.txt
index 1ed46869c2c8a..0c3b978e4dcc5 100644
--- a/mlir/unittests/IR/CMakeLists.txt
+++ b/mlir/unittests/IR/CMakeLists.txt
@@ -5,6 +5,7 @@ add_mlir_unittest(MLIRIRTests
   InterfaceTest.cpp
   IRMapping.cpp
   InterfaceAttachmentTest.cpp
+  LocationTest.cpp
   OperationSupportTest.cpp
   PatternMatchTest.cpp
   ShapedTypeTest.cpp
diff --git a/mlir/unittests/IR/LocationTest.cpp b/mlir/unittests/IR/LocationTest.cpp
new file mode 100644
index 0000000000000..7475c117d8c3a
--- /dev/null
+++ b/mlir/unittests/IR/LocationTest.cpp
@@ -0,0 +1,54 @@
+//===- LocationTest.cpp -----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/IR/Location.h"
+#include "mlir/IR/Diagnostics.h"
+#include "llvm/Support/LineIterator.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "gtest/gtest.h"
+
+using namespace mlir;
+
+TEST(LocationTest, FileRange) {
+  MLIRContext context;
+  std::string text = R"(
+    Secti<on o>f text where the error is reported from.
+  )";
+  std::string fileName = "file.mlir";
+  std::unique_ptr<llvm::MemoryBuffer> buffer =
+      llvm::MemoryBuffer::getMemBuffer(text, fileName);
+  std::string str;
+  llvm::raw_string_ostream os(str);
+  llvm::SourceMgr sourceMgr;
+  sourceMgr.AddNewSourceBuffer(std::move(buffer), llvm::SMLoc());
+  SourceMgrDiagnosticHandler sourceMgrHandler(sourceMgr, &context, os);
+
+  auto fileRange = FileRangeLoc::get(&context, fileName, /*line=*/2,
+                                     /*column=*/10, /*byteSize=*/6);
+  sourceMgrHandler.emitDiagnostic(fileRange, "Update this",
+                                  DiagnosticSeverity::Warning, true);
+
+  llvm::MemoryBufferRef resBuffer(os.str(), "result");
+  llvm::line_iterator it(resBuffer);
+  size_t ltIndex = -1, carrotIndex = -2;
+  size_t gtIndex = -3, tildeIndex = -4;
+  for (; !it.is_at_eof(); ++it) {
+    if (size_t id = it->find_first_of('<'); id != StringRef::npos) {
+      ltIndex = id;
+      gtIndex = it->find_last_of('>');
+    }
+    if (size_t id = it->find_first_of('^'); id != StringRef::npos) {
+      carrotIndex = id;
+      tildeIndex = it->find_last_of('~');
+    }
+  }
+  EXPECT_EQ(ltIndex, carrotIndex);
+  EXPECT_EQ(gtIndex, tildeIndex);
+}
diff --git a/mlir/utils/gdb-scripts/prettyprinters.py b/mlir/utils/gdb-scripts/prettyprinters.py
index 45fd0837c9391..5096725f2c71c 100644
--- a/mlir/utils/gdb-scripts/prettyprinters.py
+++ b/mlir/utils/gdb-scripts/prettyprinters.py
@@ -181,6 +181,7 @@ def to_string(self):
     # mlir/IR/Location.h
     "CallSiteLoc",
     "FileLineColLoc",
+    "FileRangeLoc",
     "FusedLoc",
     "NameLoc",
     "OpaqueLoc",



More information about the Mlir-commits mailing list